{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HTML解析-南方学院新闻 & liepin实践\n",
    "\n",
    "*  本周主要内容：HTML解析（parse HTML）及Xpath实践\n",
    "*  21_Web数据挖掘_week05\n",
    "*  电子讲义设计者:许智超\n",
    "<br/>\n",
    "<br/>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 上周回顾及翻页思考\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "from requests_html import HTMLSession\n",
    "import requests_html\n",
    "import pandas as pd\n",
    "import urllib.parse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A1  nfu.edu.cn \n",
    "session = HTMLSession()\n",
    "r = session.get(\"https://www.nfu.edu.cn/xxyw/index.htm\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## html 页面数据的存与读"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 存\n",
    "with open (\"html_out/_nfu_xxyw_文学与传媒学院.html\", encoding = \"utf8\", mode = \"w\") as fp:\n",
    "    fp.write(r.html.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'<!DOCTYPE html>\\n<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\\n\\t<meta name=\"renderer\" content=\"webkit\">\\n\\t<meta http-equiv=\"x-ua-compatible\" content=\"IE=edge\">\\n\\n\\t<title>学校要闻 - 广州南方学院</title>\\n\\t<meta name=\"keywords\" content=\"\">\\n\\t<meta name=\"description\" content=\"\">\\n\\t\\n\\t<link rel=\"stylesheet\" type=\"text/css\" href=\"../css/swiper-3.3.1.min.css\">\\n\\t<link href=\"../css/lin.css\" rel=\"stylesheet\" type=\"text/css\">\\n\\n\\t<script src=\"../js/jquery-1.11.3.min.js\"></script>\\n\\t<script src=\"../js/jquery-1.11.1.js\"></script>\\n\\t<script src=\"../js/jquery.easie-min.js\" type=\"text/javascript\"></script>\\n\\t<script src=\"../js/swiper.min.js\" type=\"text/javascript\"></script>\\n\\t<script src=\"../js/lin.js\"></script>\\n\\t\\n\\t\\n\\t<link href=\"../css/page.css\" rel=\"stylesheet\" type=\"text/css\">\\n\\t<link href=\"http://www.nfu.edu.cn/Public/favicon.ico\" rel=\"Shortcut Icon\">\\n\\t<link href=\"http://www.nfu.edu.cn/Public/favicon.ico\" rel=\"Bookmark\">\\n\\n<script type=\"text/javascript\"> \\n\\tvar browser={\\n\\t    versions:function(){\\n\\t        var u = navigator.userAgent, app = navigator.appVersion;\\n\\t        return {//移动终端浏览器版本信息\\n\\t            trident: u.indexOf(\\'Trident\\') > -1, //IE内核\\n\\t            presto: u.indexOf(\\'Presto\\') > -1, //opera内核\\n\\t            webKit: u.indexOf(\\'AppleWebKit\\') > -1, //苹果、谷歌内核\\n\\t            gecko: u.indexOf(\\'Gecko\\') > -1 && u.indexOf(\\'KHTML\\') == -1, //火狐内核\\n\\t            mobile: !!u.match(/AppleWebKit.*Mobile.*/)||u.indexOf(\\'iPad\\') > -1, //是否为移动终端\\n\\t            ios: !!u.match(/\\\\(i[^;]+;( U;)? CPU.+Mac OS X/), //ios终端\\n\\t            android: u.indexOf(\\'Android\\') > -1 || u.indexOf(\\'Linux\\') > -1, //android终端或者uc浏览器\\n\\t            iPhone: u.indexOf(\\'iPhone\\') > -1, //是否为iPhone或者QQHD浏览器\\n\\t            iPad: u.indexOf(\\'iPad\\') > -1, //是否iPad\\n\\t            webApp: u.indexOf(\\'Safari\\') == -1 //是否web应该程序，没有头部与底部\\n\\t        };\\n\\t    }(),\\n\\t}\\n\\tif(browser.versions.android || browser.versions.iPhone){\\n\\t\\tvar url = document.location.href ;\\n\\t\\tvar ext = url.substr(url.lastIndexOf(\".\"),url.length) ;\\n\\t\\tif(ext != null && (ext == \\'.htm\\' || ext == \\'.html\\')){\\n\\t\\t\\turl = url.substr(0,url.lastIndexOf(\".\")) ;\\n\\t\\t\\turl = url + \"_mobile\" + ext ;\\n\\t\\t}else{\\n\\t\\t\\turl = url + \"index_mobile.htm\" ;\\n\\t\\t}\\n\\t\\tdocument.location = url ;\\n\\t}\\n</script>\\n </head>\\n<body>\\n<!--头部-->\\n\\t\\n<div class=\"lin-header \">\\n\\t\\t<div class=\"lin-head clearfix\">\\n\\t\\t\\t<h1 class=\"lin-topl\">\\n\\t\\t\\t\\t<a href=\"../index.htm\" target=\"_blank\" title=\"广州南方学院\">\\n\\t\\t\\t\\t\\t<img src=\"../images/logo.png\">\\n\\t\\t\\t\\t</a>\\n\\t\\t\\t</h1>\\n\\t\\t\\t<div class=\"lin-topr\">\\n\\t\\t\\t\\t<div class=\"lin-youxiang\">\\n\\t\\t\\t\\t\\t<!--<a href=\"http://oa.nfu.edu.cn/\" target=\"_blank\">办公系统</a>-->\\n\\t\\t\\t\\t\\t<a href=\"http://en.nfu.edu.cn/\">English Version</a>\\n\\t\\t\\t\\t\\t\\t<!-- <a href=\"https://mail.nfu.edu.cn/\" target=\"_blank\">邮箱登录</a>\\n\\t\\t\\t\\t\\t\\t\\t<a href=\"mailto:nfcsysuyz@126.com\" target=\"_blank\" title=\"nfcsysuyz@126.com\" >院长信箱</a> -->\\n\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t<div class=\"lin-ser lin-serhide\">\\n\\t\\t\\t\\t\\t<div class=\"serbox\">\\n\\t\\t\\t\\t\\t\\t<form action=\"/cms/web/search/index.jsp\" method=\"get\" id=\"search_form\">\\n\\t\\t\\t\\t\\t\\t\\t<input name=\"siteID\" value=\"bbef6bf928be491d9180b44993df0ac9\" type =\"hidden\"> \\n\\t\\t\\t\\t\\t\\t\\t<input type=\"text\" name=\"query\" id=\"keyword\" placeholder=\"搜索\">\\n\\t\\t\\t\\t\\t\\t\\t<a href=\"javascript:;\" id=\"search_btn\">\\n\\t\\t\\t\\t\\t\\t\\t</a>\\n\\t\\t\\t\\t\\t\\t</form>\\t\\n\\t\\t\\t\\t\\t\\t<script type=\"text/javascript\">\\n\\t\\t\\t\\t\\t\\t\\t$(\"#search_btn\").click(function(){\\n\\t\\t\\t\\t\\t\\t\\t\\tvar keyword=$(\"#keyword\").val();\\n\\t\\t\\t\\t\\t\\t\\t\\tif(keyword==\\'\\'){\\n\\t\\t\\t\\t\\t\\t\\t\\t\\talert(\\'* 请输入搜索关键词 !\\');\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t$(\"#keyword\").focus();\\n\\t\\t\\t\\t\\t\\t\\t\\t\\treturn false;\\n\\t\\t\\t\\t\\t\\t\\t\\t}else{\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t$(\"#search_form\").submit();\\n\\t\\t\\t\\t\\t\\t\\t\\t}\\n\\t\\t\\t\\t\\t\\t\\t})\\n\\t\\t\\t\\t\\t\\t</script>\\n\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t</div>\\n\\t\\t\\t</div>\\n\\t\\t</div>\\n\\t</div>\\n\\t\\t\\t<!-- end 头部-->\\n\\t\\t\\t<!--导航条-->\\n\\t\\t\\t<div class=\"lin-navbar\">\\n\\t\\t\\t\\t<p class=\"navnav\">\\n\\t\\t\\t\\t\\t<span>\\n\\t\\t\\t\\t\\t</span>\\n\\t\\t\\t\\t\\t<span>\\n\\t\\t\\t\\t\\t</span>\\n\\t\\t\\t\\t\\t<span>\\n\\t\\t\\t\\t\\t</span>\\n\\t\\t\\t\\t</p>\\n\\t\\t\\t\\t<ul class=\"lin-nav clearfix\">\\n\\t\\t\\t\\t\\t<li class=\"lin-navli\"><a href=\"../index.htm\">首页</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t<a href=\"../xxgk/index.htm\" target=\"_blank\">学校概况</a>\\n\\t\\t\\t\\t\\t\\t<div class=\"lin-navdiv\">\\n\\t\\t\\t\\t\\t\\t\\t<div class=\"sonnav-bg\">\\n\\t\\t\\t\\t\\t\\t\\t\\t<ul class=\"nav-conul clearfix\" style=\"padding-left: 99px;\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../xxgk/xxjj/index.htm\" target=\"_self\">学校简介</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../xxgk/xrld/index.htm\" target=\"_self\">现任领导</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../xxgk/xhxxxg/index.htm\" target=\"_self\">校徽  校训  校歌</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../xxgk/nfdsj/index.htm\" target=\"_self\">南方大事记</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../xxgk/xxxl/index.htm\" target=\"_self\">学校校历</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t\\t<a href=\"http://cpc.nfu.edu.cn/\" target=\"_self\">党建之窗</a>\\n\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t<a href=\"../jgsz/index.htm\" target=\"_blank\">机构设置</a>\\n\\t\\t\\t\\t\\t\\t<div class=\"lin-navdiv\">\\n\\t\\t\\t\\t\\t\\t\\t<div class=\"sonnav-bg\">\\n\\t\\t\\t\\t\\t\\t\\t\\t<ul class=\"nav-conul clearfix\" style=\"padding-left: 99px;\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../jgsz/yxsz/index.htm\" target=\"_self\">院系设置</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../jgsz/gljg/index.htm\" target=\"_self\">管理机构</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../jgsz/cswyh/index.htm\" target=\"_self\">常设委员会</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t<a href=\"../rcpy/index.htm\" target=\"_blank\">人才培养</a>\\n\\t\\t\\t\\t\\t\\t<div class=\"lin-navdiv\">\\n\\t\\t\\t\\t\\t\\t\\t<div class=\"sonnav-bg\">\\n\\t\\t\\t\\t\\t\\t\\t\\t<ul class=\"nav-conul clearfix\" style=\"padding-left: 99px;\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../rcpy/msjs/index.htm\" target=\"_self\">名师介绍</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../rcpy/bkjy/index.htm\" target=\"_self\">本科教育</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../rcpy/jxjy/index.htm\" target=\"_self\">继续教育</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t<a href=\"../jxky/index.htm\" target=\"_blank\">教学科研</a>\\n\\t\\t\\t\\t\\t\\t<div class=\"lin-navdiv\">\\n\\t\\t\\t\\t\\t\\t\\t<div class=\"sonnav-bg\">\\n\\t\\t\\t\\t\\t\\t\\t\\t<ul class=\"nav-conul clearfix\" style=\"padding-left: 99px;\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"http://jw.nfu.edu.cn/\" target=\"_self\">教务部</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"http://kyb.nfu.edu.cn/\" target=\"_self\">科研部</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../jxky/kyjg/index.htm\" target=\"_self\">科研机构</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t<a href=\"../zsjy/index.htm\" target=\"_blank\">招生就业</a>\\n\\t\\t\\t\\t\\t\\t<div class=\"lin-navdiv\">\\n\\t\\t\\t\\t\\t\\t\\t<div class=\"sonnav-bg\">\\n\\t\\t\\t\\t\\t\\t\\t\\t<ul class=\"nav-conul clearfix\" style=\"padding-left: 99px;\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"http://zsb.nfu.edu.cn/\" target=\"_self\">本科招生</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"http://jx.nfu.edu.cn/\" target=\"_self\">继续教育</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../zsjy/jyfw/index.htm\" target=\"_self\">就业服务</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t<a href=\"../tsg/index.htm\" target=\"_blank\">图书馆</a>\\n\\t\\t\\t\\t\\t\\t<div class=\"lin-navdiv\">\\n\\t\\t\\t\\t\\t\\t\\t<div class=\"sonnav-bg\">\\n\\t\\t\\t\\t\\t\\t\\t\\t<ul class=\"nav-conul clearfix\" style=\"padding-left: 99px;\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"http://lib.nfu.edu.cn/\" target=\"_self\">图书馆</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"http://das.nfu.edu.cn/\" target=\"_self\">档案室</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t<a href=\"../hzjl/index.htm\" target=\"_blank\">合作交流</a>\\n\\t\\t\\t\\t\\t\\t<div class=\"lin-navdiv\">\\n\\t\\t\\t\\t\\t\\t\\t<div class=\"sonnav-bg\">\\n\\t\\t\\t\\t\\t\\t\\t\\t<ul class=\"nav-conul clearfix\" style=\"padding-left: 99px;\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"http://gj.nfu.edu.cn/\" target=\"_self\">国际交流</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"http://gj.nfu.edu.cn/Home/Waishi/waishilist/class/1/p/1.html\" target=\"_self\">外事服务</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t<a href=\"../rczp/index.htm\" target=\"_blank\">人才招聘</a>\\n\\t\\t\\t\\t\\t\\t<div class=\"lin-navdiv\">\\n\\t\\t\\t\\t\\t\\t\\t<div class=\"sonnav-bg\">\\n\\t\\t\\t\\t\\t\\t\\t\\t<ul class=\"nav-conul clearfix\" style=\"padding-left: 99px;\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../rczp/jsxl/index.htm\" target=\"_self\">教师系列</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../rczp/glxl/index.htm\" target=\"_self\">管理系列</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li class=\"lin-navli\">\\n\\t\\t\\t\\t\\t\\t<a href=\"../zjnf/index.htm\" target=\"_blank\">走进南方</a>\\n\\t\\t\\t\\t\\t\\t<div class=\"lin-navdiv\">\\n\\t\\t\\t\\t\\t\\t\\t<div class=\"sonnav-bg\">\\n\\t\\t\\t\\t\\t\\t\\t\\t<ul class=\"nav-conul clearfix\" style=\"padding-left: 99px;\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../zjnf/tsnf/index.htm\" target=\"_self\">图说南方</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../zjnf/shfw/index.htm\" target=\"_self\">生活服务</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../zjnf/ylfw/index.htm\" target=\"_self\">医疗服务</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../zjnf/xb/index.htm\" target=\"_self\">校报</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li><a href=\"../zjnf/jtzy/index.htm\" target=\"_self\">交通指引</a></li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t</div>\\n\\t\\t\\t<div class=\"lin-navbg\"></div>\\n\\n\\t<!--content-->\\n\\t<div class=\"lin-content\">\\n\\t\\t<div class=\"lin-neiye clearfix\">\\n\\t\\t\\t\\n<div class=\"list_left clearfix\">\\n\\t<div class=\"list_title\">学校要闻</div>\\n\\t<div class=\"list_con\">\\n\\t  <p class=\"nav-ny\">\\n\\t   <span> </span>\\n\\t   <span></span>\\n\\t   <span></span>\\n\\t\\t</p>\\n\\t\\t<ul class=\"list_ul\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"index.htm\" class=\"current_in\">学校要闻<span class=\"yuan\"></span></a>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t  <ul class=\"in_ul\">\\n\\t\\t\\t\\t\\t  </ul>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"../xydt/index.htm\">校园动态<span class=\"yuan\"></span></a>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t  <ul class=\"in_ul\">\\n\\t\\t\\t\\t\\t  </ul>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"../tzgg/index.htm\">通知公告<span class=\"yuan\"></span></a>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t  <ul class=\"in_ul\">\\n\\t\\t\\t\\t\\t  </ul>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"../ztb/index.htm\">招投标<span class=\"yuan\"></span></a>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t  <ul class=\"in_ul\">\\n\\t\\t\\t\\t\\t  </ul>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"../gjdt/index.htm\">高教动态<span class=\"yuan\"></span></a>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t  <ul class=\"in_ul\">\\n\\t\\t\\t\\t\\t  </ul>\\n\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t</div>\\n</div>\\t\\t\\t<div class=\"list_right\">\\n\\t\\t\\t\\t<div class=\"fan clearfix\">\\n\\t\\t\\t\\t\\t<span class=\"fan_title\">学校要闻</span>\\n\\t\\t\\t\\t\\t<span class=\"fan_right\">您当前位置是：                 <a href=\"../index.htm\">网站首页</a> > \\n        <font>学校要闻</font>\\n\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t<div class=\"ny_content\">\\n\\t\\t\\t\\t\\t<ul class=\"list-ul\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-04-09</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"5b71d46d3b114859ae92f7535a7d60c9.htm\" title=\"快&#xff01;来为我校大学生国旗护卫队参赛点赞&#xff01;\">            快&#xff01;来为我校大学生国旗护卫队参赛点赞&#xff01; \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-04-02</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"f9bcd8092b494a04becfaf48b3138e20.htm\" title=\"专注当下&#xff0c;冲刺高考&#xff0c;奋斗出最美的青春\">            专注当下&#xff0c;冲刺高考&#xff0c;奋斗出最美的青春 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-04-02</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"48b0929919ec4d2d9a2cdc278fc884ea.htm\" title=\"我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会\">            我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-04-02</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"0d7bd841484a42a69d241e79365b6290.htm\" title=\"我校承办首届 “新时代从商培养工程”\">            我校承办首届 “新时代从商培养工程” \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-31</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"debb2f222e024cbda5d2644acb6c552c.htm\" title=\"广东工业大学华立学院来访我校\">            广东工业大学华立学院来访我校 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-31</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"e5378134dbaf4b7b88d3003f1cd99e59.htm\" title=\"“疫情下的中国、美国以及中美关系”高层论坛暨广州南方学院“美国研究中心”成立五周年纪念研讨会\">            “疫情下的中国、美国以及中美关系”高层论坛暨广州南方学院“美国研究中心”成立五周年纪念研讨... \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-29</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"7c865b16b203467ab6ddf5569f73e5c1.htm\" title=\"“思想政治理论第一课”上，学校党委书记、校长讲了这些！\">            “思想政治理论第一课”上，学校党委书记、校长讲了这些！ \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-29</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"28b0ad0eee8149e6b7f4ae65395910ff.htm\" title=\"学生报道|传承红色基因&#xff0c;讲好党员故事&#xff1a;我校举办身边优秀党员事迹分享会\">            学生报道|传承红色基因&#xff0c;讲好党员故事&#xff1a;我校举办身边优秀党员事迹... \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-26</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"c48c33c8f744430eb9417b800a8b2e3f.htm\" title=\"权威发布&#xff1a;我校名列2021年中国民办高校第四\">            权威发布&#xff1a;我校名列2021年中国民办高校第四 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-26</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"395b8e2ba5df47c59d080d50d1113be1.htm\" title=\"喜讯：电气学院学子在国际顶尖期刊发表学术论文\">            喜讯：电气学院学子在国际顶尖期刊发表学术论文 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-19</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"59bda093ced440f78c638ade40ab0b93.htm\" title=\"我校召开行政人员专题培训暨2021年春季学期第一次办公室工作例会\">            我校召开行政人员专题培训暨2021年春季学期第一次办公室工作例会 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-15</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"1af5590575b74762b624f048b5ad79f4.htm\" title=\"我校开展2021年春季学期教学检查工作\">            我校开展2021年春季学期教学检查工作 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-15</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"4e32521de0da4d21979182e1b114a964.htm\" title=\"媒体报道我校入选国家级一流本科专业建设点情况\">            媒体报道我校入选国家级一流本科专业建设点情况 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-12</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"23279088871e4b89b8eab2e7fbc77b17.htm\" title=\"我校举行转设更名挂牌仪式\">            我校举行转设更名挂牌仪式 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-12</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"a5de3999469447b488857144f58f8c27.htm\" title=\"广东省教育厅公布国家级一流专业建设点数量排名：我校位列广东高校第29，同类院校第1\">            广东省教育厅公布国家级一流专业建设点数量排名：我校位列广东高校第29，同类院校第1 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-12</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"6273fd9185b54b20a0af15b9878f1d2c.htm\" title=\"我校政商研究院学生胡志翔在国际期刊发表论文\">            我校政商研究院学生胡志翔在国际期刊发表论文 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-12</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"a1f9ac1d39704e4d8136478ec97e3635.htm\" title=\"我校举行大一学生升旗仪式暨晨跑之星颁奖大会\">            我校举行大一学生升旗仪式暨晨跑之星颁奖大会 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-11</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"c438a1ec6db5446faf76617654b5ca55.htm\" title=\"我校召开党史学习教育动员大会\">            我校召开党史学习教育动员大会 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-09</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"f28729353ff749b9b170825ffe346949.htm\" title=\"我校组织参加全省教育系统党史学习教育动员部署会视频会议\">            我校组织参加全省教育系统党史学习教育动员部署会视频会议 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<li>\\n\\t\\t\\t\\t\\t\\t\\t\\t<font class=\"right-more\">2021-03-08</font>\\n\\t\\t\\t\\t\\t\\t\\t\\t<div class=\"news_title\">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<a href=\"f25bba2bf25d43399a88598c769bb779.htm\" title=\"喜讯：我校计算机科学与技术专业获IEET认证\">            喜讯：我校计算机科学与技术专业获IEET认证 \\n    </a>\\n\\t\\t\\t\\t\\t\\t\\t\\t</div>\\n\\t\\t\\t\\t\\t\\t\\t</li>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</ul>\\n\\t\\t\\t\\t\\t\\t<div style=\"clear: both;\"></div>\\n\\t\\t\\t\\t\\t\\t\\n\\t<div class=\"pages\" align=\"center\">\\n  <div>\\n            \\n    \\t\\t    \\t\\t        \\t\\t\\t<a class=\"next\" href=\"index1.htm\">&gt;&gt;</a>\\n\\t\\t    \\t\\t  </div>\\n</div>\\n    </div></div>\\n\\t\\t\\t\\t</div>\\n\\t\\t\\t</div>\\n\\t\\t</div>\\n\\t</div>\\n\\t\\t<!-- end 内容区域-->\\t\\n\\t\\t\\n\\t\\t<!--底部-->\\n\\t\\t\\n<div class=\"lin-footer\">\\n\\t<div class=\"lin-fer clearfix\">\\n\\t\\t<div class=\"ferleft\">\\n\\t\\t\\t<ul class=\"fer-ul clearfix\">\\n\\t\\t\\t<li class=\"fer-li\">\\n\\t\\t\\t<a href=\"http://www.moe.gov.cn/\" target=\"_blank\" title=\"教育部\">            教育部 \\n    </a>\\n\\t\\t</li>\\n\\t\\t\\t<li class=\"fer-li\">\\n\\t\\t\\t<a href=\"http://edu.gd.gov.cn/\" target=\"_blank\" title=\"广东省教育厅\">            广东省教育厅 \\n    </a>\\n\\t\\t</li>\\n\\t\\t\\t<li class=\"fer-li\">\\n\\t\\t\\t<a href=\"http://www.sysu.edu.cn/2012/cn/index.htm\" target=\"_blank\" title=\"中山大学\">            中山大学 \\n    </a>\\n\\t\\t</li>\\n\\t</ul>\\n<ul class=\"fer-ul clearfix\">\\n\\t\\t\\t<li class=\"fer-li\">\\n\\t\\t\\t<a href=\"http://www.gz.gov.cn/\" target=\"_blank\" title=\"广州市政府\">            广州市政府 \\n    </a>\\n\\t\\t</li>\\n\\t\\t\\t<li class=\"fer-li\">\\n\\t\\t\\t<a href=\"http://www.gdpr.com/\" target=\"_blank\" title=\"珠江投资\">            珠江投资 \\n    </a>\\n\\t\\t</li>\\n\\t\\t\\t<li class=\"fer-li\">\\n\\t\\t\\t<a href=\"http://www.gdmbjy.cn/\" target=\"_blank\" title=\"广东民办教育网\">            广东民办教育网 \\n    </a>\\n\\t\\t</li>\\n\\t</ul>\\n<ul class=\"fer-ul clearfix\">\\n\\t\\t\\t<li class=\"fer-li\">\\n\\t\\t\\t<a href=\"https://www.cnki.net/\" target=\"_blank\" title=\"中国知网\">            中国知网 \\n    </a>\\n\\t\\t</li>\\n\\t</ul>\\n\\t\\t</div>\\n\\t\\t<div class=\"fercen\">\\n\\t\\t\\t<div class=\"fer-er\">\\n\\t\\t\\t\\t<img src=\"../images/erweima1.jpg\">\\n\\t\\t\\t</div>\\n\\t\\t\\t<div class=\"fer-er\">\\n\\t\\t\\t\\t<img src=\"../images/erweima2.jpg\">\\n\\t\\t\\t</div>\\n\\t\\t</div>\\n\\t\\t<div class=\"ferright\">\\n\\t\\t\\t<div>\\n\\t\\t\\t\\t<p>\\n\\t\\t\\t\\t\\t<span>地址：广州市从化区温泉大道882号广州南方学院</span>\\n\\t\\t\\t\\t\\t<span>邮编：510970</span>\\n\\t\\t\\t\\t</p>\\n\\t\\t\\t</div>\\n\\t\\t\\t<div class=\"addleft\">\\n\\t\\t\\t\\t<p>联系电话：020-61787368</p>\\n\\t\\t\\t\\t<p>版权所有 ©  广州南方学院</p>\\n\\t\\t\\t\\t<p>技术支持：<a href=\"https://www.gpowersoft.com/\" target=\"_blank\">Gpower 通元软件</a>\\n\\t\\t\\t\\t</p>\\n\\t\\t\\t</div>\\n\\t\\t\\t<div class=\"addright\">\\n\\t\\t\\t\\t<p>招生咨询：020-61787331</p> \\n\\t\\t\\t\\t<p>\\n\\t\\t\\t\\t\\t<span class=\"add-spante\">\\n\\t\\t\\t\\t\\t\\t<a target=\"_blank\" href=\"https://beian.miit.gov.cn/\">粤ICP备11077779号</a>\\n\\t\\t\\t\\t\\t</span> \\n\\t\\t\\t\\t\\t<span class=\"add-spante\">\\n\\t\\t\\t\\t\\t\\t<a href=\"http://tycms.nfu.edu.cn/cms/main\" target=\"_blank\">网站管理</a>&nbsp;&nbsp;\\n\\t\\t\\t\\t\\t\\t<a href=\"http://old.nfu.edu.cn/\" target=\"_blank\">旧站入口</a>\\n\\t\\t\\t\\t\\t</span>\\n\\t\\t\\t\\t</p>\\n\\t\\t\\t\\t\\n\\t\\t\\t</div>\\n\\t\\t</div>\\n\\t\\t\\n\\t\\t<div align=\"center\">\\n\\t\\t\\t<a target=\"_blank\" href=\"http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=44011702000081\" style=\"display:inline-block;text-decoration:none;height:20px;line-height:20px;\">\\n\\t\\t\\t\\t<img src=\"../images/icp.png\" style=\"float:left;\">\\n\\t\\t\\t\\t<p style=\"float:left;height:20px;line-height:20px;margin: 0px 0px 0px 5px; color:#ffffff;\">粤公网安备 44011702000081号</p>\\n\\t\\t\\t</a>\\n\\t\\t</div>\\n\\t</div>\\n</div>\\t\\t<!-- end 底部-->\\n\\n\\t\\t<script type=\"text/javascript\" language=\"javascript\">\\n\\t\\t\\t\\n    //加入收藏\\n    \\n    function AddFavorite(sURL, sTitle) {\\n    \\t\\n    \\tsURL = encodeURI(sURL); \\n    \\ttry{   \\n    \\t\\t\\n    \\t\\twindow.external.addFavorite(sURL, sTitle);   \\n    \\t\\t\\n    \\t}catch(e) {   \\n    \\t\\t\\n    \\t\\ttry{   \\n    \\t\\t\\t\\n    \\t\\t\\twindow.sidebar.addPanel(sTitle, sURL, \"\");   \\n    \\t\\t\\t\\n    \\t\\t}catch (e) {   \\n    \\t\\t\\t\\n    \\t\\t\\talert(\"加入收藏失败，请使用Ctrl+D进行添加,或手动在浏览器里进行设置.\");\\n    \\t\\t\\t\\n    \\t\\t}   \\n    \\t\\t\\n    \\t}\\n    \\t\\n    }\\n    \\n    //设为首页\\n    \\n    function SetHome(url){\\n    \\t\\n    \\tif (document.all) {\\n    \\t\\t\\n    \\t\\tdocument.body.style.behavior=\\'url(#default#homepage)\\';\\n    \\t\\t\\n    \\t\\tdocument.body.setHomePage(url);\\n    \\t\\t\\n    \\t}else{\\n    \\t\\t\\n    \\t\\talert(\"您好,您的浏览器不支持自动设置页面为首页功能,请您手动在浏览器里设置该页面为首页!\");\\n    \\t\\t\\n    \\t}\\n    \\t\\n    }\\n    \\n</script>\\n\\n\\n\\n</body></html><script type=\\'text/javascript\\' src=\\'../g_style/g_list.js\\'></script>\\n</body></html>'"
      ]
     },
     "execution_count": 149,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读\n",
    "with open (\"html_out/_nfu_xxyw_文学与传媒学院.html\", encoding = \"utf8\", mode = \"r\") as fp:\n",
    "    html_load = fp.read()\n",
    "html_load"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## soup_html 解析 ： str的html文件 => element html元素文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Element html at 0x285f8eb59a8>"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "parsed = requests_html.soup_parse(html_load)\n",
    "parsed"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 解析和重塑链接（内容链接）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ParseResult(scheme='https', netloc='www.nfu.edu.cn', path='/xxyw/index.htm', params='', query='', fragment='')"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "base_url = r.url\n",
    "nfu_urlparse = urllib.parse.urlparse(base_url)\n",
    "nfu_urlparse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xxyw/5b71d46d3b114859ae92f7535a7d60c9.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04becfaf48b3138e20.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9a2cdc278fc884ea.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69d241e79365b6290.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/debb2f222e024cbda5d2644acb6c552c.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/e5378134dbaf4b7b88d3003f1cd99e59.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/7c865b16b203467ab6ddf5569f73e5c1.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/28b0ad0eee8149e6b7f4ae65395910ff.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/c48c33c8f744430eb9417b800a8b2e3f.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/395b8e2ba5df47c59d080d50d1113be1.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/59bda093ced440f78c638ade40ab0b93.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/1af5590575b74762b624f048b5ad79f4.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/4e32521de0da4d21979182e1b114a964.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/23279088871e4b89b8eab2e7fbc77b17.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/a5de3999469447b488857144f58f8c27.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/6273fd9185b54b20a0af15b9878f1d2c.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/a1f9ac1d39704e4d8136478ec97e3635.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/c438a1ec6db5446faf76617654b5ca55.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/f28729353ff749b9b170825ffe346949.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/f25bba2bf25d43399a88598c769bb779.htm']"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 重组链接\n",
    "list_URL  = [urllib.parse.urlunparse\\\n",
    "([nfu_urlparse.scheme,nfu_urlparse.netloc,'/'+ nfu_urlparse.path.split('/')[1] +'/' + detail_url,'','',''])\\\n",
    "for detail_url in parsed.xpath('//div[@class=\"news_title\"]/a/@href')]\n",
    "list_URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>快！来为我校大学生国旗护卫队参赛点赞！</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/5b71d46d3b114859ae...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>专注当下，冲刺高考，奋斗出最美的青春</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04be...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9a...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>我校承办首届 “新时代从商培养工程”</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69d...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>广东工业大学华立学院来访我校</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/debb2f222e024cbda5...</td>\n",
       "      <td>2021-03-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>“疫情下的中国、美国以及中美关系”高层论坛暨广州南方学院“美国研究中心”成立五周年纪念研讨会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/e5378134dbaf4b7b88...</td>\n",
       "      <td>2021-03-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>“思想政治理论第一课”上，学校党委书记、校长讲了这些！</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/7c865b16b203467ab6...</td>\n",
       "      <td>2021-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>学生报道|传承红色基因，讲好党员故事：我校举办身边优秀党员事迹分享会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/28b0ad0eee8149e6b7...</td>\n",
       "      <td>2021-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>权威发布：我校名列2021年中国民办高校第四</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/c48c33c8f744430eb9...</td>\n",
       "      <td>2021-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>喜讯：电气学院学子在国际顶尖期刊发表学术论文</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/395b8e2ba5df47c59d...</td>\n",
       "      <td>2021-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>我校召开行政人员专题培训暨2021年春季学期第一次办公室工作例会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/59bda093ced440f78c...</td>\n",
       "      <td>2021-03-19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>我校开展2021年春季学期教学检查工作</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/1af5590575b74762b6...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>媒体报道我校入选国家级一流本科专业建设点情况</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/4e32521de0da4d2197...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>我校举行转设更名挂牌仪式</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/23279088871e4b89b8...</td>\n",
       "      <td>2021-03-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>广东省教育厅公布国家级一流专业建设点数量排名：我校位列广东高校第29，同类院校第1</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/a5de3999469447b488...</td>\n",
       "      <td>2021-03-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>我校政商研究院学生胡志翔在国际期刊发表论文</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/6273fd9185b54b20a0...</td>\n",
       "      <td>2021-03-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>我校举行大一学生升旗仪式暨晨跑之星颁奖大会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/a1f9ac1d39704e4d81...</td>\n",
       "      <td>2021-03-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>我校召开党史学习教育动员大会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/c438a1ec6db5446faf...</td>\n",
       "      <td>2021-03-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>我校组织参加全省教育系统党史学习教育动员部署会视频会议</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f28729353ff749b9b1...</td>\n",
       "      <td>2021-03-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>喜讯：我校计算机科学与技术专业获IEET认证</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f25bba2bf25d43399a...</td>\n",
       "      <td>2021-03-08</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                标题  \\\n",
       "0                              快！来为我校大学生国旗护卫队参赛点赞！   \n",
       "1                               专注当下，冲刺高考，奋斗出最美的青春   \n",
       "2                  我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会   \n",
       "3                               我校承办首届 “新时代从商培养工程”   \n",
       "4                                   广东工业大学华立学院来访我校   \n",
       "5   “疫情下的中国、美国以及中美关系”高层论坛暨广州南方学院“美国研究中心”成立五周年纪念研讨会   \n",
       "6                      “思想政治理论第一课”上，学校党委书记、校长讲了这些！   \n",
       "7               学生报道|传承红色基因，讲好党员故事：我校举办身边优秀党员事迹分享会   \n",
       "8                           权威发布：我校名列2021年中国民办高校第四   \n",
       "9                           喜讯：电气学院学子在国际顶尖期刊发表学术论文   \n",
       "10                我校召开行政人员专题培训暨2021年春季学期第一次办公室工作例会   \n",
       "11                             我校开展2021年春季学期教学检查工作   \n",
       "12                          媒体报道我校入选国家级一流本科专业建设点情况   \n",
       "13                                    我校举行转设更名挂牌仪式   \n",
       "14       广东省教育厅公布国家级一流专业建设点数量排名：我校位列广东高校第29，同类院校第1   \n",
       "15                           我校政商研究院学生胡志翔在国际期刊发表论文   \n",
       "16                           我校举行大一学生升旗仪式暨晨跑之星颁奖大会   \n",
       "17                                  我校召开党史学习教育动员大会   \n",
       "18                     我校组织参加全省教育系统党史学习教育动员部署会视频会议   \n",
       "19                          喜讯：我校计算机科学与技术专业获IEET认证   \n",
       "\n",
       "                                                   链结          日期  \n",
       "0   https://www.nfu.edu.cn/xxyw/5b71d46d3b114859ae...  2021-04-09  \n",
       "1   https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04be...  2021-04-02  \n",
       "2   https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9a...  2021-04-02  \n",
       "3   https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69d...  2021-04-02  \n",
       "4   https://www.nfu.edu.cn/xxyw/debb2f222e024cbda5...  2021-03-31  \n",
       "5   https://www.nfu.edu.cn/xxyw/e5378134dbaf4b7b88...  2021-03-31  \n",
       "6   https://www.nfu.edu.cn/xxyw/7c865b16b203467ab6...  2021-03-29  \n",
       "7   https://www.nfu.edu.cn/xxyw/28b0ad0eee8149e6b7...  2021-03-29  \n",
       "8   https://www.nfu.edu.cn/xxyw/c48c33c8f744430eb9...  2021-03-26  \n",
       "9   https://www.nfu.edu.cn/xxyw/395b8e2ba5df47c59d...  2021-03-26  \n",
       "10  https://www.nfu.edu.cn/xxyw/59bda093ced440f78c...  2021-03-19  \n",
       "11  https://www.nfu.edu.cn/xxyw/1af5590575b74762b6...  2021-03-15  \n",
       "12  https://www.nfu.edu.cn/xxyw/4e32521de0da4d2197...  2021-03-15  \n",
       "13  https://www.nfu.edu.cn/xxyw/23279088871e4b89b8...  2021-03-12  \n",
       "14  https://www.nfu.edu.cn/xxyw/a5de3999469447b488...  2021-03-12  \n",
       "15  https://www.nfu.edu.cn/xxyw/6273fd9185b54b20a0...  2021-03-12  \n",
       "16  https://www.nfu.edu.cn/xxyw/a1f9ac1d39704e4d81...  2021-03-12  \n",
       "17  https://www.nfu.edu.cn/xxyw/c438a1ec6db5446faf...  2021-03-11  \n",
       "18  https://www.nfu.edu.cn/xxyw/f28729353ff749b9b1...  2021-03-09  \n",
       "19  https://www.nfu.edu.cn/xxyw/f25bba2bf25d43399a...  2021-03-08  "
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 输出结果\n",
    "# B-D-1 pd.DataFrame 建构，pandas课有教\n",
    "df = pd.DataFrame( {\n",
    "         \"标题\": parsed.xpath('//div[@class=\"news_title\"]/a/@title'),\n",
    "         \"链结\": list_URL,\n",
    "         \"日期\": parsed.xpath('//font[@class=\"right-more\"]/text()'),\n",
    "     } )\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [],
   "source": [
    "# B-D-2 pd.DataFrame 输出excel，pandas课有教\n",
    "df.to_excel(\"data_out/nfu_xxyw_文学与传媒学院.xlsx\", sheet_name=\"检索结果\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 本周内容1:如何实现翻页？\n",
    "\n",
    "* 1. 翻页链接有何区别？\n",
    "* 2. 有多少页？\n",
    "* 3. 实现翻页的url队列\n",
    "* 4. 批量存html文件\n",
    "* 5. 批量存excel文件"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 翻页链接有何区别？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 第一页\n",
    "# base_url_01 = r.url\n",
    "# base_url_01"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "# urllib.parse.urlsplit(base_url_01)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [],
   "source": [
    "# df = pd.DataFrame(urllib.parse.urlsplit(base_url_01)).rename({0:\"第一页\"},axis=1)\n",
    "# df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 第二页\n",
    "# base_url_02 = session.get('https://www.nfu.edu.cn/mtnf/index2.htm').url\n",
    "# base_url_02"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [],
   "source": [
    "# df['第二页'] = urllib.parse.urlsplit(base_url_02)\n",
    "# df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ！！学校要闻"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 有多少页？\n",
    "* 第三页...  第n页？多少页？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "91\n"
     ]
    }
   ],
   "source": [
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/xxyw/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break\n",
    "# so page = 19?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 实现翻页的url队列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xxyw/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index84.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index85.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index86.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index87.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index88.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index89.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index90.htm']"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group = ['https://www.nfu.edu.cn/xxyw/index'+str(i)+'.htm' for i in range(1,91)]\n",
    "url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [],
   "source": [
    "url_group.insert(0,'https://www.nfu.edu.cn/xxyw/index.htm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xxyw/index.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index84.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index85.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index86.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index87.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index88.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index89.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index90.htm']"
      ]
     },
     "execution_count": 163,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/xxyw/index.htm'"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "urllib.parse.urlparse(url_group[0]).path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 批量存html文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [],
   "source": [
    "for url in url_group:\n",
    "    r = session.get(url)\n",
    "#     print(r.html.html)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)\n",
    "    \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 批量存excel文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [],
   "source": [
    "# xpath 准备：\n",
    "dict_xpath = {\n",
    "    '链接_xpath':'//div[@class=\"news_title\"]/a/@href',\n",
    "    '标题_xpath':'//div[@class=\"news_title\"]/a/@title',\n",
    "    '日期_xpath':'//font[@class=\"right-more\"]/text()'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pages_content_url(parsed):\n",
    "    list_URL  = [urllib.parse.urlunparse\\\n",
    "                 ([nfu_urlparse.scheme,nfu_urlparse.netloc,'/'+ nfu_urlparse.path.split('/')[1] +'/' + detail_url,'','',''])\\\n",
    "                 for detail_url in parsed.xpath(dict_xpath['链接_xpath'])]\n",
    "    return list_URL\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['index.htm', 'index1.htm', 'index10.htm', 'index11.htm', 'index12.htm', 'index13.htm', 'index14.htm', 'index15.htm', 'index16.htm', 'index17.htm', 'index18.htm', 'index19.htm', 'index2.htm', 'index20.htm', 'index21.htm', 'index22.htm', 'index23.htm', 'index24.htm', 'index25.htm', 'index26.htm', 'index27.htm', 'index28.htm', 'index29.htm', 'index3.htm', 'index30.htm', 'index31.htm', 'index32.htm', 'index33.htm', 'index34.htm', 'index35.htm', 'index36.htm', 'index37.htm', 'index38.htm', 'index39.htm', 'index4.htm', 'index40.htm', 'index41.htm', 'index42.htm', 'index43.htm', 'index44.htm', 'index45.htm', 'index46.htm', 'index47.htm', 'index48.htm', 'index49.htm', 'index5.htm', 'index50.htm', 'index51.htm', 'index52.htm', 'index53.htm', 'index54.htm', 'index55.htm', 'index56.htm', 'index57.htm', 'index58.htm', 'index59.htm', 'index6.htm', 'index60.htm', 'index61.htm', 'index62.htm', 'index63.htm', 'index64.htm', 'index65.htm', 'index66.htm', 'index67.htm', 'index68.htm', 'index69.htm', 'index7.htm', 'index70.htm', 'index71.htm', 'index72.htm', 'index73.htm', 'index74.htm', 'index75.htm', 'index76.htm', 'index77.htm', 'index78.htm', 'index79.htm', 'index8.htm', 'index80.htm', 'index81.htm', 'index82.htm', 'index83.htm', 'index84.htm', 'index85.htm', 'index86.htm', 'index87.htm', 'index88.htm', 'index89.htm', 'index9.htm', 'index90.htm']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>快！来为我校大学生国旗护卫队参赛点赞！</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/5b71d46d3b114859ae...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>专注当下，冲刺高考，奋斗出最美的青春</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04be...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9a...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>我校承办首届 “新时代从商培养工程”</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69d...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>广东工业大学华立学院来访我校</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/debb2f222e024cbda5...</td>\n",
       "      <td>2021-03-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>“疫情下的中国、美国以及中美关系”高层论坛暨广州南方学院“美国研究中心”成立五周年纪念研讨会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/e5378134dbaf4b7b88...</td>\n",
       "      <td>2021-03-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>“思想政治理论第一课”上，学校党委书记、校长讲了这些！</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/7c865b16b203467ab6...</td>\n",
       "      <td>2021-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>学生报道|传承红色基因，讲好党员故事：我校举办身边优秀党员事迹分享会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/28b0ad0eee8149e6b7...</td>\n",
       "      <td>2021-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>喜讯：电气学院学子在国际顶尖期刊发表学术论文</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/395b8e2ba5df47c59d...</td>\n",
       "      <td>2021-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>权威发布：我校名列2021年中国民办高校第四</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/c48c33c8f744430eb9...</td>\n",
       "      <td>2021-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>我校召开行政人员专题培训暨2021年春季学期第一次办公室工作例会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/59bda093ced440f78c...</td>\n",
       "      <td>2021-03-19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>我校开展2021年春季学期教学检查工作</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/1af5590575b74762b6...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>媒体报道我校入选国家级一流本科专业建设点情况</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/4e32521de0da4d2197...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>我校举行转设更名挂牌仪式</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/23279088871e4b89b8...</td>\n",
       "      <td>2021-03-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>广东省教育厅公布国家级一流专业建设点数量排名：我校位列广东高校第29，同类院校第1</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/a5de3999469447b488...</td>\n",
       "      <td>2021-03-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>我校政商研究院学生胡志翔在国际期刊发表论文</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/6273fd9185b54b20a0...</td>\n",
       "      <td>2021-03-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>我校举行大一学生升旗仪式暨晨跑之星颁奖大会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/a1f9ac1d39704e4d81...</td>\n",
       "      <td>2021-03-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>我校召开党史学习教育动员大会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/c438a1ec6db5446faf...</td>\n",
       "      <td>2021-03-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>我校组织参加全省教育系统党史学习教育动员部署会视频会议</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f28729353ff749b9b1...</td>\n",
       "      <td>2021-03-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0</td>\n",
       "      <td>我校师生深入学习习近平总书记在政协医药卫生界教育界联组会上的重要讲话精神</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/d6a77d315f9844618e...</td>\n",
       "      <td>2021-03-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>喜讯：我校计算机科学与技术专业获IEET认证</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f25bba2bf25d43399a...</td>\n",
       "      <td>2021-03-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1</td>\n",
       "      <td>广东省同类院校唯一：连续两年获批国家级一流专业建设点</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/96c744528cdf40fc83...</td>\n",
       "      <td>2021-03-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2</td>\n",
       "      <td>媒体刊登我校督学顾问黄天骥教授专访</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/3190c890760d4890b5...</td>\n",
       "      <td>2021-03-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>3</td>\n",
       "      <td>我校文传学院学生曾竞在广东学习平台“强国征文”获奖</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/215f9d4a3fd6445fbf...</td>\n",
       "      <td>2021-03-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>4</td>\n",
       "      <td>校领导走访慰问教职员工</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/7a20f30cfc484b05a2...</td>\n",
       "      <td>2021-02-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>5</td>\n",
       "      <td>铺好师生返校安全之路，迈好新学期开学第一步</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/cb64557433d74cb0b5...</td>\n",
       "      <td>2021-02-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>6</td>\n",
       "      <td>护理与健康学院名誉院长方海云主任护师获全国“敬老爱老助老模范人物”荣誉称号</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/a7fbc0199b7c43eab0...</td>\n",
       "      <td>2021-02-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>7</td>\n",
       "      <td>2021年春季学期学生返校指引</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/eeb15a5fca744e99b7...</td>\n",
       "      <td>2021-02-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>8</td>\n",
       "      <td>我校召开2020年度领导干部民主生活会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/346772634f7843f6b8...</td>\n",
       "      <td>2021-01-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>9</td>\n",
       "      <td>我校2020年终总结表彰大会隆重举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f989dee2d6a34995a6...</td>\n",
       "      <td>2021-01-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1752</th>\n",
       "      <td>12</td>\n",
       "      <td>杨长学教授畅谈就业观，助我院学子职达成功</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/909334578e6e46c88a...</td>\n",
       "      <td>2013-12-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1754</th>\n",
       "      <td>14</td>\n",
       "      <td>我院学子“北银消金杯”电子商务与互联网金融大赛获全国二等奖1项三等奖2项</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f6a8f9664a6e42d5a2...</td>\n",
       "      <td>2013-12-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1753</th>\n",
       "      <td>13</td>\n",
       "      <td>喜报：我院在2012年广东省高校思政理论课建设评估中被评为优秀</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/5e1ba4ce5088451fb8...</td>\n",
       "      <td>2013-12-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1755</th>\n",
       "      <td>15</td>\n",
       "      <td>广州科技职业技术学院董事胡日章教授一行四人来访我院</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/7ca1a9b0d7964e9eb9...</td>\n",
       "      <td>2013-12-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1756</th>\n",
       "      <td>16</td>\n",
       "      <td>火热聘场 铺路搭桥——从化市2014届高校毕业生（冬季）专场招聘会举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/8c291aa5fcac48eeb0...</td>\n",
       "      <td>2013-12-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1757</th>\n",
       "      <td>17</td>\n",
       "      <td>第八届广东大中专学生校园文化艺节之舞蹈大赛在我院举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/c26c0af97acf48038b...</td>\n",
       "      <td>2013-12-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1758</th>\n",
       "      <td>18</td>\n",
       "      <td>我院喜获2013年新华教育论坛“大国教育之声”中国品牌影响力独立学院</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/4f0410882be943b4a5...</td>\n",
       "      <td>2013-12-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1759</th>\n",
       "      <td>19</td>\n",
       "      <td>我院学子在全国啦啦操联赛暨“中国啦啦之星”争霸赛总决赛中夺得亚军</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/4933369c21954a8892...</td>\n",
       "      <td>2013-12-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1760</th>\n",
       "      <td>0</td>\n",
       "      <td>多管齐下，助学子更好就业——我院2014届毕业生就业工作扎实推进</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/fb1c14ff01af4e2588...</td>\n",
       "      <td>2013-12-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1761</th>\n",
       "      <td>1</td>\n",
       "      <td>英国知山大学副校长Seth Crofts一行4人来我院洽谈合作事宜</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/5f876c948f6d4adabf...</td>\n",
       "      <td>2013-12-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762</th>\n",
       "      <td>2</td>\n",
       "      <td>夜话生活，漫谈大学——“夜话大学”系列讲座开讲</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/82062b268577401690...</td>\n",
       "      <td>2013-11-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1764</th>\n",
       "      <td>4</td>\n",
       "      <td>喜讯：我院学子荣获2013年One Show中华青年创意竞赛视频类金奖</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/5e1e30b982e246e2bc...</td>\n",
       "      <td>2013-11-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1765</th>\n",
       "      <td>5</td>\n",
       "      <td>喜报：我院学子在广东省第四届高校大学生心理剧大赛中喜获佳绩</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/3719dbce00d7452d9f...</td>\n",
       "      <td>2013-11-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1766</th>\n",
       "      <td>6</td>\n",
       "      <td>中南财经政法大学武汉学院杨洁主任一行来我院调研</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/4e827ce7d96549748b...</td>\n",
       "      <td>2013-11-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1763</th>\n",
       "      <td>3</td>\n",
       "      <td>我院辅导员赴中山大学随岗学习工作正式启动</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/23f8d4442fc94e77a9...</td>\n",
       "      <td>2013-11-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1767</th>\n",
       "      <td>7</td>\n",
       "      <td>第三届“南沙湿地杯”羊城大学生辩论赛初赛在我院举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/ce13d164907d4803a8...</td>\n",
       "      <td>2013-11-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1768</th>\n",
       "      <td>8</td>\n",
       "      <td>大爱无疆，台湾慈济基金会走进中大南方</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/6d06c7717bfd4a2ea0...</td>\n",
       "      <td>2013-11-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1769</th>\n",
       "      <td>9</td>\n",
       "      <td>我院青年志愿者协会携三项目参加第三届志愿服务广州交流会获好评</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/7dbd1352dddd475d8d...</td>\n",
       "      <td>2013-11-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1770</th>\n",
       "      <td>10</td>\n",
       "      <td>领略中国声乐的魅力 唱出华夏声乐的韵味——《南方论坛》系列讲座之六十二讲顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/9a54e83536e04239af...</td>\n",
       "      <td>2013-11-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1771</th>\n",
       "      <td>11</td>\n",
       "      <td>三中全会通过深化改革决定 媒体解读公报6看点</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/5590057facf44f2d84...</td>\n",
       "      <td>2013-11-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1772</th>\n",
       "      <td>12</td>\n",
       "      <td>十八届三中全会9日开幕 总体部署全面深化改革</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/42570d0508494167a0...</td>\n",
       "      <td>2013-11-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1773</th>\n",
       "      <td>13</td>\n",
       "      <td>我院党委党校2013年秋季要求入党积极分子培训班第三讲、第四讲顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/74902d77e2b24afeab...</td>\n",
       "      <td>2013-11-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1774</th>\n",
       "      <td>14</td>\n",
       "      <td>我院艺创系学子在广东省“和谐杯”手绘技能大赛获佳绩</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/1eca80f5d69240e491...</td>\n",
       "      <td>2013-10-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1775</th>\n",
       "      <td>15</td>\n",
       "      <td>我院多个项目获2013年度省级以上本科教学质量工程项目立项</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/e2ef39bdace94f3da9...</td>\n",
       "      <td>2013-10-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1777</th>\n",
       "      <td>17</td>\n",
       "      <td>我院经管系2013级创新实验国际班开班典礼隆重举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/2af0127ce4234c7aa5...</td>\n",
       "      <td>2013-09-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1776</th>\n",
       "      <td>16</td>\n",
       "      <td>首届从化地区学工部（处）联谊会在我院举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/5a530157f3764b32ad...</td>\n",
       "      <td>2013-09-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1778</th>\n",
       "      <td>18</td>\n",
       "      <td>学院教学工作会议顺利召开</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/b844901be7a6412eb7...</td>\n",
       "      <td>2013-09-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1779</th>\n",
       "      <td>19</td>\n",
       "      <td>我院召开新进教职工座谈会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/460879ee62c94531ba...</td>\n",
       "      <td>2013-09-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1800</th>\n",
       "      <td>0</td>\n",
       "      <td>我院2013级新生军训正式开始</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/9ae5ab09744e4d808a...</td>\n",
       "      <td>2013-09-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1801</th>\n",
       "      <td>1</td>\n",
       "      <td>我院2013级新生“安全法纪教育”讲座顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/e8f0aa3bb74d43cbb0...</td>\n",
       "      <td>2013-09-17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1802 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      index                                              标题  \\\n",
       "0         0                             快！来为我校大学生国旗护卫队参赛点赞！   \n",
       "1         1                              专注当下，冲刺高考，奋斗出最美的青春   \n",
       "2         2                 我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会   \n",
       "3         3                              我校承办首届 “新时代从商培养工程”   \n",
       "4         4                                  广东工业大学华立学院来访我校   \n",
       "5         5  “疫情下的中国、美国以及中美关系”高层论坛暨广州南方学院“美国研究中心”成立五周年纪念研讨会   \n",
       "6         6                     “思想政治理论第一课”上，学校党委书记、校长讲了这些！   \n",
       "7         7              学生报道|传承红色基因，讲好党员故事：我校举办身边优秀党员事迹分享会   \n",
       "9         9                          喜讯：电气学院学子在国际顶尖期刊发表学术论文   \n",
       "8         8                          权威发布：我校名列2021年中国民办高校第四   \n",
       "10       10                我校召开行政人员专题培训暨2021年春季学期第一次办公室工作例会   \n",
       "11       11                             我校开展2021年春季学期教学检查工作   \n",
       "12       12                          媒体报道我校入选国家级一流本科专业建设点情况   \n",
       "13       13                                    我校举行转设更名挂牌仪式   \n",
       "14       14       广东省教育厅公布国家级一流专业建设点数量排名：我校位列广东高校第29，同类院校第1   \n",
       "15       15                           我校政商研究院学生胡志翔在国际期刊发表论文   \n",
       "16       16                           我校举行大一学生升旗仪式暨晨跑之星颁奖大会   \n",
       "17       17                                  我校召开党史学习教育动员大会   \n",
       "18       18                     我校组织参加全省教育系统党史学习教育动员部署会视频会议   \n",
       "20        0            我校师生深入学习习近平总书记在政协医药卫生界教育界联组会上的重要讲话精神   \n",
       "19       19                          喜讯：我校计算机科学与技术专业获IEET认证   \n",
       "21        1                      广东省同类院校唯一：连续两年获批国家级一流专业建设点   \n",
       "22        2                               媒体刊登我校督学顾问黄天骥教授专访   \n",
       "23        3                       我校文传学院学生曾竞在广东学习平台“强国征文”获奖   \n",
       "24        4                                     校领导走访慰问教职员工   \n",
       "25        5                           铺好师生返校安全之路，迈好新学期开学第一步   \n",
       "26        6           护理与健康学院名誉院长方海云主任护师获全国“敬老爱老助老模范人物”荣誉称号   \n",
       "27        7                                 2021年春季学期学生返校指引   \n",
       "28        8                             我校召开2020年度领导干部民主生活会   \n",
       "29        9                              我校2020年终总结表彰大会隆重举行   \n",
       "...     ...                                             ...   \n",
       "1752     12                            杨长学教授畅谈就业观，助我院学子职达成功   \n",
       "1754     14            我院学子“北银消金杯”电子商务与互联网金融大赛获全国二等奖1项三等奖2项   \n",
       "1753     13                 喜报：我院在2012年广东省高校思政理论课建设评估中被评为优秀   \n",
       "1755     15                       广州科技职业技术学院董事胡日章教授一行四人来访我院   \n",
       "1756     16             火热聘场 铺路搭桥——从化市2014届高校毕业生（冬季）专场招聘会举行   \n",
       "1757     17                      第八届广东大中专学生校园文化艺节之舞蹈大赛在我院举行   \n",
       "1758     18              我院喜获2013年新华教育论坛“大国教育之声”中国品牌影响力独立学院   \n",
       "1759     19                我院学子在全国啦啦操联赛暨“中国啦啦之星”争霸赛总决赛中夺得亚军   \n",
       "1760      0                多管齐下，助学子更好就业——我院2014届毕业生就业工作扎实推进   \n",
       "1761      1               英国知山大学副校长Seth Crofts一行4人来我院洽谈合作事宜   \n",
       "1762      2                         夜话生活，漫谈大学——“夜话大学”系列讲座开讲   \n",
       "1764      4             喜讯：我院学子荣获2013年One Show中华青年创意竞赛视频类金奖   \n",
       "1765      5                   喜报：我院学子在广东省第四届高校大学生心理剧大赛中喜获佳绩   \n",
       "1766      6                         中南财经政法大学武汉学院杨洁主任一行来我院调研   \n",
       "1763      3                            我院辅导员赴中山大学随岗学习工作正式启动   \n",
       "1767      7                       第三届“南沙湿地杯”羊城大学生辩论赛初赛在我院举行   \n",
       "1768      8                              大爱无疆，台湾慈济基金会走进中大南方   \n",
       "1769      9                  我院青年志愿者协会携三项目参加第三届志愿服务广州交流会获好评   \n",
       "1770     10        领略中国声乐的魅力 唱出华夏声乐的韵味——《南方论坛》系列讲座之六十二讲顺利举行   \n",
       "1771     11                          三中全会通过深化改革决定 媒体解读公报6看点   \n",
       "1772     12                          十八届三中全会9日开幕 总体部署全面深化改革   \n",
       "1773     13             我院党委党校2013年秋季要求入党积极分子培训班第三讲、第四讲顺利举行   \n",
       "1774     14                       我院艺创系学子在广东省“和谐杯”手绘技能大赛获佳绩   \n",
       "1775     15                   我院多个项目获2013年度省级以上本科教学质量工程项目立项   \n",
       "1777     17                       我院经管系2013级创新实验国际班开班典礼隆重举行   \n",
       "1776     16                            首届从化地区学工部（处）联谊会在我院举行   \n",
       "1778     18                                    学院教学工作会议顺利召开   \n",
       "1779     19                                    我院召开新进教职工座谈会   \n",
       "1800      0                                 我院2013级新生军训正式开始   \n",
       "1801      1                         我院2013级新生“安全法纪教育”讲座顺利举行   \n",
       "\n",
       "                                                     链结          日期  \n",
       "0     https://www.nfu.edu.cn/xxyw/5b71d46d3b114859ae...  2021-04-09  \n",
       "1     https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04be...  2021-04-02  \n",
       "2     https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9a...  2021-04-02  \n",
       "3     https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69d...  2021-04-02  \n",
       "4     https://www.nfu.edu.cn/xxyw/debb2f222e024cbda5...  2021-03-31  \n",
       "5     https://www.nfu.edu.cn/xxyw/e5378134dbaf4b7b88...  2021-03-31  \n",
       "6     https://www.nfu.edu.cn/xxyw/7c865b16b203467ab6...  2021-03-29  \n",
       "7     https://www.nfu.edu.cn/xxyw/28b0ad0eee8149e6b7...  2021-03-29  \n",
       "9     https://www.nfu.edu.cn/xxyw/395b8e2ba5df47c59d...  2021-03-26  \n",
       "8     https://www.nfu.edu.cn/xxyw/c48c33c8f744430eb9...  2021-03-26  \n",
       "10    https://www.nfu.edu.cn/xxyw/59bda093ced440f78c...  2021-03-19  \n",
       "11    https://www.nfu.edu.cn/xxyw/1af5590575b74762b6...  2021-03-15  \n",
       "12    https://www.nfu.edu.cn/xxyw/4e32521de0da4d2197...  2021-03-15  \n",
       "13    https://www.nfu.edu.cn/xxyw/23279088871e4b89b8...  2021-03-12  \n",
       "14    https://www.nfu.edu.cn/xxyw/a5de3999469447b488...  2021-03-12  \n",
       "15    https://www.nfu.edu.cn/xxyw/6273fd9185b54b20a0...  2021-03-12  \n",
       "16    https://www.nfu.edu.cn/xxyw/a1f9ac1d39704e4d81...  2021-03-12  \n",
       "17    https://www.nfu.edu.cn/xxyw/c438a1ec6db5446faf...  2021-03-11  \n",
       "18    https://www.nfu.edu.cn/xxyw/f28729353ff749b9b1...  2021-03-09  \n",
       "20    https://www.nfu.edu.cn/xxyw/d6a77d315f9844618e...  2021-03-08  \n",
       "19    https://www.nfu.edu.cn/xxyw/f25bba2bf25d43399a...  2021-03-08  \n",
       "21    https://www.nfu.edu.cn/xxyw/96c744528cdf40fc83...  2021-03-04  \n",
       "22    https://www.nfu.edu.cn/xxyw/3190c890760d4890b5...  2021-03-01  \n",
       "23    https://www.nfu.edu.cn/xxyw/215f9d4a3fd6445fbf...  2021-03-01  \n",
       "24    https://www.nfu.edu.cn/xxyw/7a20f30cfc484b05a2...  2021-02-26  \n",
       "25    https://www.nfu.edu.cn/xxyw/cb64557433d74cb0b5...  2021-02-26  \n",
       "26    https://www.nfu.edu.cn/xxyw/a7fbc0199b7c43eab0...  2021-02-25  \n",
       "27    https://www.nfu.edu.cn/xxyw/eeb15a5fca744e99b7...  2021-02-20  \n",
       "28    https://www.nfu.edu.cn/xxyw/346772634f7843f6b8...  2021-01-29  \n",
       "29    https://www.nfu.edu.cn/xxyw/f989dee2d6a34995a6...  2021-01-28  \n",
       "...                                                 ...         ...  \n",
       "1752  https://www.nfu.edu.cn/xxyw/909334578e6e46c88a...  2013-12-17  \n",
       "1754  https://www.nfu.edu.cn/xxyw/f6a8f9664a6e42d5a2...  2013-12-16  \n",
       "1753  https://www.nfu.edu.cn/xxyw/5e1ba4ce5088451fb8...  2013-12-16  \n",
       "1755  https://www.nfu.edu.cn/xxyw/7ca1a9b0d7964e9eb9...  2013-12-13  \n",
       "1756  https://www.nfu.edu.cn/xxyw/8c291aa5fcac48eeb0...  2013-12-09  \n",
       "1757  https://www.nfu.edu.cn/xxyw/c26c0af97acf48038b...  2013-12-07  \n",
       "1758  https://www.nfu.edu.cn/xxyw/4f0410882be943b4a5...  2013-12-06  \n",
       "1759  https://www.nfu.edu.cn/xxyw/4933369c21954a8892...  2013-12-05  \n",
       "1760  https://www.nfu.edu.cn/xxyw/fb1c14ff01af4e2588...  2013-12-04  \n",
       "1761  https://www.nfu.edu.cn/xxyw/5f876c948f6d4adabf...  2013-12-03  \n",
       "1762  https://www.nfu.edu.cn/xxyw/82062b268577401690...  2013-11-29  \n",
       "1764  https://www.nfu.edu.cn/xxyw/5e1e30b982e246e2bc...  2013-11-28  \n",
       "1765  https://www.nfu.edu.cn/xxyw/3719dbce00d7452d9f...  2013-11-28  \n",
       "1766  https://www.nfu.edu.cn/xxyw/4e827ce7d96549748b...  2013-11-28  \n",
       "1763  https://www.nfu.edu.cn/xxyw/23f8d4442fc94e77a9...  2013-11-28  \n",
       "1767  https://www.nfu.edu.cn/xxyw/ce13d164907d4803a8...  2013-11-27  \n",
       "1768  https://www.nfu.edu.cn/xxyw/6d06c7717bfd4a2ea0...  2013-11-27  \n",
       "1769  https://www.nfu.edu.cn/xxyw/7dbd1352dddd475d8d...  2013-11-20  \n",
       "1770  https://www.nfu.edu.cn/xxyw/9a54e83536e04239af...  2013-11-15  \n",
       "1771  https://www.nfu.edu.cn/xxyw/5590057facf44f2d84...  2013-11-13  \n",
       "1772  https://www.nfu.edu.cn/xxyw/42570d0508494167a0...  2013-11-11  \n",
       "1773  https://www.nfu.edu.cn/xxyw/74902d77e2b24afeab...  2013-11-01  \n",
       "1774  https://www.nfu.edu.cn/xxyw/1eca80f5d69240e491...  2013-10-14  \n",
       "1775  https://www.nfu.edu.cn/xxyw/e2ef39bdace94f3da9...  2013-10-11  \n",
       "1777  https://www.nfu.edu.cn/xxyw/2af0127ce4234c7aa5...  2013-09-29  \n",
       "1776  https://www.nfu.edu.cn/xxyw/5a530157f3764b32ad...  2013-09-29  \n",
       "1778  https://www.nfu.edu.cn/xxyw/b844901be7a6412eb7...  2013-09-26  \n",
       "1779  https://www.nfu.edu.cn/xxyw/460879ee62c94531ba...  2013-09-25  \n",
       "1800  https://www.nfu.edu.cn/xxyw/9ae5ab09744e4d808a...  2013-09-17  \n",
       "1801  https://www.nfu.edu.cn/xxyw/e8f0aa3bb74d43cbb0...  2013-09-17  \n",
       "\n",
       "[1802 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "list_df = []\n",
    "\n",
    "\n",
    "files= os.listdir('html_out/xxyw/')\n",
    "print(files)\n",
    "\n",
    "for html in files:\n",
    "    with open('html_out/xxyw/'+html,encoding='utf8',mode='r') as fp:\n",
    "        html_load = fp.read()\n",
    "        parsed = requests_html.soup_parse(html_load)\n",
    "        list_URL = pages_content_url(parsed)\n",
    "        \n",
    "        df = pd.DataFrame( {\n",
    "         \"标题\": parsed.xpath(dict_xpath['标题_xpath']),\n",
    "         \"链结\": list_URL,\n",
    "         \"日期\": parsed.xpath(dict_xpath['日期_xpath']),\n",
    "        } )\n",
    "        list_df.append(df)\n",
    "\n",
    "        \n",
    "        \n",
    "df_all = pd.concat(list_df).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all)    \n",
    "\n",
    "df_all\n",
    "\n",
    "df_all.to_excel(\"data_out/nfu_xxyw_文学与传媒学院.xlsx\", sheet_name=\"学校要闻\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ！！校园动态"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A1  nfu.edu.cn \n",
    "session = HTMLSession()\n",
    "r = session.get(\"https://www.nfu.edu.cn/xydt/index.htm\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Element html at 0x285fb98a778>"
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "parsed = requests_html.soup_parse(html_load)\n",
    "parsed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ParseResult(scheme='https', netloc='www.nfu.edu.cn', path='/xydt/index.htm', params='', query='', fragment='')"
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "base_url = r.url\n",
    "nfu_urlparse = urllib.parse.urlparse(base_url)\n",
    "nfu_urlparse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "85\n"
     ]
    }
   ],
   "source": [
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/xydt/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break\n",
    "# so page = 19?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xydt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index84.htm']"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_xydt = ['https://www.nfu.edu.cn/xydt/index'+str(i)+'.htm' for i in range(1,85)]\n",
    "url_group_xydt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "metadata": {},
   "outputs": [],
   "source": [
    "url_group_xydt.insert(0,'https://www.nfu.edu.cn/xydt/index.htm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xydt/index.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index84.htm']"
      ]
     },
     "execution_count": 177,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_xydt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for url in url_group_xydt:\n",
    "    r = session.get(url)\n",
    "#     print(r.html.html)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/xydt/index.htm'"
      ]
     },
     "execution_count": 186,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "urllib.parse.urlparse(url_group_xydt[0]).path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {},
   "outputs": [],
   "source": [
    "# xpath 准备：\n",
    "dict_xpath = {\n",
    "    '链接_xpath':'//div[@class=\"news_title\"]/a/@href',\n",
    "    '标题_xpath':'//div[@class=\"news_title\"]/a/@title',\n",
    "    '日期_xpath':'//font[@class=\"right-more\"]/text()'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pages_content_url(parsed):\n",
    "    list_URL  = [urllib.parse.urlunparse\\\n",
    "                 ([nfu_urlparse.scheme,nfu_urlparse.netloc,'/'+ nfu_urlparse.path.split('/')[1] +'/' + detail_url,'','',''])\\\n",
    "                 for detail_url in parsed.xpath(dict_xpath['链接_xpath'])]\n",
    "    return list_URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['index.htm', 'index1.htm', 'index10.htm', 'index11.htm', 'index12.htm', 'index13.htm', 'index14.htm', 'index15.htm', 'index16.htm', 'index17.htm', 'index18.htm', 'index19.htm', 'index2.htm', 'index20.htm', 'index21.htm', 'index22.htm', 'index23.htm', 'index24.htm', 'index25.htm', 'index26.htm', 'index27.htm', 'index28.htm', 'index29.htm', 'index3.htm', 'index30.htm', 'index31.htm', 'index32.htm', 'index33.htm', 'index34.htm', 'index35.htm', 'index36.htm', 'index37.htm', 'index38.htm', 'index39.htm', 'index4.htm', 'index40.htm', 'index41.htm', 'index42.htm', 'index43.htm', 'index44.htm', 'index45.htm', 'index46.htm', 'index47.htm', 'index48.htm', 'index49.htm', 'index5.htm', 'index50.htm', 'index51.htm', 'index52.htm', 'index53.htm', 'index54.htm', 'index55.htm', 'index56.htm', 'index57.htm', 'index58.htm', 'index59.htm', 'index6.htm', 'index60.htm', 'index61.htm', 'index62.htm', 'index63.htm', 'index64.htm', 'index65.htm', 'index66.htm', 'index67.htm', 'index68.htm', 'index69.htm', 'index7.htm', 'index70.htm', 'index71.htm', 'index72.htm', 'index73.htm', 'index74.htm', 'index75.htm', 'index76.htm', 'index77.htm', 'index78.htm', 'index79.htm', 'index8.htm', 'index80.htm', 'index81.htm', 'index82.htm', 'index83.htm', 'index84.htm', 'index9.htm']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>【国奖映像】苏绮筠：让优秀成为习惯</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/7dfe6fcd15fd495597...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>护理与健康学院2020-2021年度第二学期3月份团支部委员会顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/9ec16bf90e164071b6...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>【国奖映像】陈宇：心怀热爱，奔赴梦想</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/09627d3243ee4578ac...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>护理与健康学院2020-2021第二学期团员培训课程第2讲圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/debea203b0c84a3092...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>商学院电子商务专业召开申请调整学位授予学科门类 专家评审会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/cf4420785b9046e998...</td>\n",
       "      <td>2021-04-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>会计学院大一年级大会顺利召开</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/16f4c5f4bd284caebf...</td>\n",
       "      <td>2021-04-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>广州新华学院会计学院刘运国院长一行莅临我院访问</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/935f580040704990a4...</td>\n",
       "      <td>2021-04-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>【国奖映像】蒋晓琳：明确目标，为之努力</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/9611d110ec8a486587...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>励能计划2021：你选哪一项？</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/8b2414ee7cca45d88c...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>党团同行传薪火，红色循迹筑初心——商学院党团同行重走“东江纵队”红色之路</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/a9523b72a34e4143af...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>广州南方学院老年与慢病护理研究中心学术沙龙系列第3期圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/f8434c9f092348c2a4...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>护理与健康学院“教学相长，从教学中成长”暨青年教师专题讲座与交流活动圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/bdda4dfbda944a3eb8...</td>\n",
       "      <td>2021-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>青马工程｜“回首峥嵘岁月，领悟红船精神”——商学院百年党史宣讲活动</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/4b0c8e69b5074d28ba...</td>\n",
       "      <td>2021-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>凝心聚力 共谱新篇——商学院召开新学期全体教职工大会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/d60d33983337463390...</td>\n",
       "      <td>2021-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>党建观摩拓思路，对照标杆“取真经”——我校商学院师生团队赴广东外语外贸大学南国商学院管理学院...</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/96e05388e3fa43de9a...</td>\n",
       "      <td>2021-03-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>护理与健康学院直属党支部2021年春季入党积极分子培训圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/25dc7cb574284be18a...</td>\n",
       "      <td>2021-03-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>成长在文传我院举行2021年春季学期全体教职工大会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/b9777111c2194e7b85...</td>\n",
       "      <td>2021-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>“远离糖尿病，筑起健康防线” 护理与健康学院寒假社会实践调查成果汇报展示圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/2f3dcc0f4400419e8e...</td>\n",
       "      <td>2021-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>倾心指导促研学，凝心聚力谋发展——大英中心教学研究座谈会顺利召开</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/c3846031b4c0444a99...</td>\n",
       "      <td>2021-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>护理与健康学院第37期师生面对面顺利开展</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/5e165fdaee83489989...</td>\n",
       "      <td>2021-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0</td>\n",
       "      <td>凝聚共识谋规划， 师生同心促发展—— 电气与计算机工程学院召开全体教职工大会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/76d9a219517e4a2b8c...</td>\n",
       "      <td>2021-03-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1</td>\n",
       "      <td>护理与健康学院“开卷行之”征文比赛圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/bf6e98d6027544809e...</td>\n",
       "      <td>2021-03-24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2</td>\n",
       "      <td>青春心向党，奋进新时代——护理与健康学院院长“思政第一课”顺利开讲</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/874bf131af7746d888...</td>\n",
       "      <td>2021-03-23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>4</td>\n",
       "      <td>广州南方学院教育实践基地签约挂牌仪式暨2021年良口中学第一期课程开班仪式</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/7f86c7803e7c440883...</td>\n",
       "      <td>2021-03-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>3</td>\n",
       "      <td>护理与健康学院召开2021年春季工作会议</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/30f39cccbda84ecb92...</td>\n",
       "      <td>2021-03-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>5</td>\n",
       "      <td>会计学一流专业建设研讨会暨“注协班”签约仪式在我校顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/635fef967364474789...</td>\n",
       "      <td>2021-03-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>6</td>\n",
       "      <td>护理与健康学院直属党支部举办“传承红色基因，讲好党员故事”故事汇报会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/2de35ebbd3574d8f9f...</td>\n",
       "      <td>2021-03-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>7</td>\n",
       "      <td>坚定信念，逐梦考研 ——会计学院2019级考研动员会顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/1ca0eaebd5754d809a...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>8</td>\n",
       "      <td>会计学院召开2021年学生出国留学工作推进会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/4fa9318bd56f4feea8...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>9</td>\n",
       "      <td>会计学院辅导员走访宿舍，真情关怀暖人心</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/8c88bbfcfb0f47c7b1...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1634</th>\n",
       "      <td>14</td>\n",
       "      <td>身体力行，从我做起——会计学系雷锋月校园清洁志愿活动顺利展开</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/129b868ea670422bbf...</td>\n",
       "      <td>2016-03-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1635</th>\n",
       "      <td>15</td>\n",
       "      <td>经济学与商务管理系校外雷锋月摆摊活动圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/ee227cdddb6549d7b6...</td>\n",
       "      <td>2016-03-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1636</th>\n",
       "      <td>16</td>\n",
       "      <td>我院举行2016年春季安全教育讲座</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/b72a0e63860f4cf681...</td>\n",
       "      <td>2016-03-19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1637</th>\n",
       "      <td>17</td>\n",
       "      <td>回首过往，展望未来——会计学系2014级会计专业新学期班委会顺利召开</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/209b8e39f508454a86...</td>\n",
       "      <td>2016-03-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1639</th>\n",
       "      <td>19</td>\n",
       "      <td>公共关系协会3?14情人节晚会浪漫落幕</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/cb1f1c6c8b824282ad...</td>\n",
       "      <td>2016-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1638</th>\n",
       "      <td>18</td>\n",
       "      <td>我院2016年辅导员职业能力竞赛圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/056a88be1f054f3289...</td>\n",
       "      <td>2016-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1640</th>\n",
       "      <td>0</td>\n",
       "      <td>我院工会教职工书画协会成立大会暨第一届理事会第一次会议召开</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/076e3d04308647da81...</td>\n",
       "      <td>2016-03-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1641</th>\n",
       "      <td>1</td>\n",
       "      <td>妙语连珠 辩出精彩——会计学系第七届班际辩论赛开幕式顺利举办</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/a3d003ee0cb845c1af...</td>\n",
       "      <td>2016-03-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1643</th>\n",
       "      <td>3</td>\n",
       "      <td>文传系与广东广播电视台移动频道合作搭建协同育人平台</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/8f195f6a59db4bb4bd...</td>\n",
       "      <td>2016-03-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1642</th>\n",
       "      <td>2</td>\n",
       "      <td>心理学社年会落幕，2016年重新启航</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/077570f22ca4477cae...</td>\n",
       "      <td>2016-03-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1644</th>\n",
       "      <td>4</td>\n",
       "      <td>书香寒假，你我共读——会计学系顺利开展“悦读阅会乐”寒假读书活动</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/fb57039d5437421cb9...</td>\n",
       "      <td>2016-03-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1645</th>\n",
       "      <td>5</td>\n",
       "      <td>怀揣理想，再度起航——2015-2016学年第二学期会计学系团学动员大会顺利召开</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/2db3a56623ae40c5b4...</td>\n",
       "      <td>2016-03-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1647</th>\n",
       "      <td>7</td>\n",
       "      <td>戮力同心谋发展，砥砺前行谱新篇——国际学院召开首次工作会议</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/b93b1cac18e846baac...</td>\n",
       "      <td>2016-03-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1646</th>\n",
       "      <td>6</td>\n",
       "      <td>会计学系顺利召开发展战略研讨会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/0181005d75c745ae86...</td>\n",
       "      <td>2016-03-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1648</th>\n",
       "      <td>8</td>\n",
       "      <td>传承家风家规，弘扬传统美德——记政商研究院“家风”主题演讲比赛</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/e36771c2b86d4d498a...</td>\n",
       "      <td>2016-03-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1649</th>\n",
       "      <td>9</td>\n",
       "      <td>新学期 新开始——记公共管理学系开学升旗仪式</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/da5c7e4c2ac441e68b...</td>\n",
       "      <td>2016-03-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1652</th>\n",
       "      <td>12</td>\n",
       "      <td>走进政商宿舍，走近政商学子——记新学期师生宿舍走访</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/29cb4400d9e44acfbe...</td>\n",
       "      <td>2016-03-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1651</th>\n",
       "      <td>11</td>\n",
       "      <td>勇攀高峰，展望未来——外文系新学期班长会议顺利召开</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/5912160e075c4aa9a0...</td>\n",
       "      <td>2016-03-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1650</th>\n",
       "      <td>10</td>\n",
       "      <td>国际学院举办第三届中美学生跨文化交流活动</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/01bc304799e44dc0ab...</td>\n",
       "      <td>2016-03-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1653</th>\n",
       "      <td>13</td>\n",
       "      <td>我院会计学系逢甲大学研修专班开训典礼顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/fe516b71e79544bb80...</td>\n",
       "      <td>2016-02-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1654</th>\n",
       "      <td>14</td>\n",
       "      <td>寒潮不减热情 相聚更显情深——电子通信与软件工程系顺利举办校友交流会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/30996b733cb74e0e81...</td>\n",
       "      <td>2016-01-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1655</th>\n",
       "      <td>15</td>\n",
       "      <td>产教结合，共享双赢——电软系赴企业考察交流</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/fbe740cdd4534c20af...</td>\n",
       "      <td>2016-01-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1656</th>\n",
       "      <td>16</td>\n",
       "      <td>电软系召开工程教育专业认证课程改革项目中期检查会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/7367ece3043744caac...</td>\n",
       "      <td>2016-01-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1657</th>\n",
       "      <td>17</td>\n",
       "      <td>海峡两岸同根源，共为南院谋发展——团委与台湾籍教师第一次交流茶话会成功举办</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/ff3a705f1c104117a4...</td>\n",
       "      <td>2016-01-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1659</th>\n",
       "      <td>19</td>\n",
       "      <td>广东技术师范学院大学英语部与我院大学英语教学中心教师交流会顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/5cc461d4a37a4afb8d...</td>\n",
       "      <td>2016-01-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1658</th>\n",
       "      <td>18</td>\n",
       "      <td>文学与传媒系2016年学术研讨会议成功举办</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/ae2cf68f6b1247a6a1...</td>\n",
       "      <td>2016-01-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1661</th>\n",
       "      <td>1</td>\n",
       "      <td>严谨为学，诚信迎考--工商管理系班级期末总结暨诚信考试动员大会圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/3d02b255690e4ce796...</td>\n",
       "      <td>2016-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1660</th>\n",
       "      <td>0</td>\n",
       "      <td>经济学与商务管理系顺利召开办公培训会议</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/9b59863e5051412d80...</td>\n",
       "      <td>2016-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1662</th>\n",
       "      <td>2</td>\n",
       "      <td>以学生为本，做优秀学生干部——我院“青马工程”第二讲举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/0132532dbb0e448d82...</td>\n",
       "      <td>2015-10-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1663</th>\n",
       "      <td>3</td>\n",
       "      <td>经济学与商务管理系党总支第17期入党积极分子实践活动圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/2d20a911787b4cc09c...</td>\n",
       "      <td>2015-03-28</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1684 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      index                                                 标题  \\\n",
       "0         0                                  【国奖映像】苏绮筠：让优秀成为习惯   \n",
       "3         3                护理与健康学院2020-2021年度第二学期3月份团支部委员会顺利举行   \n",
       "1         1                                 【国奖映像】陈宇：心怀热爱，奔赴梦想   \n",
       "2         2                  护理与健康学院2020-2021第二学期团员培训课程第2讲圆满结束   \n",
       "4         4                      商学院电子商务专业召开申请调整学位授予学科门类 专家评审会   \n",
       "5         5                                     会计学院大一年级大会顺利召开   \n",
       "6         6                            广州新华学院会计学院刘运国院长一行莅临我院访问   \n",
       "7         7                                【国奖映像】蒋晓琳：明确目标，为之努力   \n",
       "8         8                                    励能计划2021：你选哪一项？   \n",
       "9         9               党团同行传薪火，红色循迹筑初心——商学院党团同行重走“东江纵队”红色之路   \n",
       "10       10                     广州南方学院老年与慢病护理研究中心学术沙龙系列第3期圆满结束   \n",
       "13       13             护理与健康学院“教学相长，从教学中成长”暨青年教师专题讲座与交流活动圆满结束   \n",
       "12       12                  青马工程｜“回首峥嵘岁月，领悟红船精神”——商学院百年党史宣讲活动   \n",
       "11       11                         凝心聚力 共谱新篇——商学院召开新学期全体教职工大会   \n",
       "14       14  党建观摩拓思路，对照标杆“取真经”——我校商学院师生团队赴广东外语外贸大学南国商学院管理学院...   \n",
       "15       15                    护理与健康学院直属党支部2021年春季入党积极分子培训圆满结束   \n",
       "16       16                          成长在文传我院举行2021年春季学期全体教职工大会   \n",
       "17       17           “远离糖尿病，筑起健康防线” 护理与健康学院寒假社会实践调查成果汇报展示圆满结束   \n",
       "18       18                   倾心指导促研学，凝心聚力谋发展——大英中心教学研究座谈会顺利召开   \n",
       "19       19                               护理与健康学院第37期师生面对面顺利开展   \n",
       "20        0             凝聚共识谋规划， 师生同心促发展—— 电气与计算机工程学院召开全体教职工大会   \n",
       "21        1                              护理与健康学院“开卷行之”征文比赛圆满结束   \n",
       "22        2                  青春心向党，奋进新时代——护理与健康学院院长“思政第一课”顺利开讲   \n",
       "24        4              广州南方学院教育实践基地签约挂牌仪式暨2021年良口中学第一期课程开班仪式   \n",
       "23        3                               护理与健康学院召开2021年春季工作会议   \n",
       "25        5                      会计学一流专业建设研讨会暨“注协班”签约仪式在我校顺利举行   \n",
       "26        6                 护理与健康学院直属党支部举办“传承红色基因，讲好党员故事”故事汇报会   \n",
       "27        7                     坚定信念，逐梦考研 ——会计学院2019级考研动员会顺利举行   \n",
       "28        8                             会计学院召开2021年学生出国留学工作推进会   \n",
       "29        9                                会计学院辅导员走访宿舍，真情关怀暖人心   \n",
       "...     ...                                                ...   \n",
       "1634     14                     身体力行，从我做起——会计学系雷锋月校园清洁志愿活动顺利展开   \n",
       "1635     15                             经济学与商务管理系校外雷锋月摆摊活动圆满结束   \n",
       "1636     16                                  我院举行2016年春季安全教育讲座   \n",
       "1637     17                 回首过往，展望未来——会计学系2014级会计专业新学期班委会顺利召开   \n",
       "1639     19                                公共关系协会3?14情人节晚会浪漫落幕   \n",
       "1638     18                               我院2016年辅导员职业能力竞赛圆满结束   \n",
       "1640      0                      我院工会教职工书画协会成立大会暨第一届理事会第一次会议召开   \n",
       "1641      1                     妙语连珠 辩出精彩——会计学系第七届班际辩论赛开幕式顺利举办   \n",
       "1643      3                          文传系与广东广播电视台移动频道合作搭建协同育人平台   \n",
       "1642      2                                 心理学社年会落幕，2016年重新启航   \n",
       "1644      4                   书香寒假，你我共读——会计学系顺利开展“悦读阅会乐”寒假读书活动   \n",
       "1645      5           怀揣理想，再度起航——2015-2016学年第二学期会计学系团学动员大会顺利召开   \n",
       "1647      7                      戮力同心谋发展，砥砺前行谱新篇——国际学院召开首次工作会议   \n",
       "1646      6                                    会计学系顺利召开发展战略研讨会   \n",
       "1648      8                    传承家风家规，弘扬传统美德——记政商研究院“家风”主题演讲比赛   \n",
       "1649      9                             新学期 新开始——记公共管理学系开学升旗仪式   \n",
       "1652     12                          走进政商宿舍，走近政商学子——记新学期师生宿舍走访   \n",
       "1651     11                          勇攀高峰，展望未来——外文系新学期班长会议顺利召开   \n",
       "1650     10                               国际学院举办第三届中美学生跨文化交流活动   \n",
       "1653     13                             我院会计学系逢甲大学研修专班开训典礼顺利举行   \n",
       "1654     14                 寒潮不减热情 相聚更显情深——电子通信与软件工程系顺利举办校友交流会   \n",
       "1655     15                              产教结合，共享双赢——电软系赴企业考察交流   \n",
       "1656     16                           电软系召开工程教育专业认证课程改革项目中期检查会   \n",
       "1657     17              海峡两岸同根源，共为南院谋发展——团委与台湾籍教师第一次交流茶话会成功举办   \n",
       "1659     19                  广东技术师范学院大学英语部与我院大学英语教学中心教师交流会顺利举行   \n",
       "1658     18                              文学与传媒系2016年学术研讨会议成功举办   \n",
       "1661      1                严谨为学，诚信迎考--工商管理系班级期末总结暨诚信考试动员大会圆满结束   \n",
       "1660      0                                经济学与商务管理系顺利召开办公培训会议   \n",
       "1662      2                       以学生为本，做优秀学生干部——我院“青马工程”第二讲举行   \n",
       "1663      3                     经济学与商务管理系党总支第17期入党积极分子实践活动圆满结束   \n",
       "\n",
       "                                                     链结          日期  \n",
       "0     https://www.nfu.edu.cn/xydt/7dfe6fcd15fd495597...  2021-04-09  \n",
       "3     https://www.nfu.edu.cn/xydt/9ec16bf90e164071b6...  2021-04-09  \n",
       "1     https://www.nfu.edu.cn/xydt/09627d3243ee4578ac...  2021-04-09  \n",
       "2     https://www.nfu.edu.cn/xydt/debea203b0c84a3092...  2021-04-09  \n",
       "4     https://www.nfu.edu.cn/xydt/cf4420785b9046e998...  2021-04-07  \n",
       "5     https://www.nfu.edu.cn/xydt/16f4c5f4bd284caebf...  2021-04-06  \n",
       "6     https://www.nfu.edu.cn/xydt/935f580040704990a4...  2021-04-06  \n",
       "7     https://www.nfu.edu.cn/xydt/9611d110ec8a486587...  2021-04-02  \n",
       "8     https://www.nfu.edu.cn/xydt/8b2414ee7cca45d88c...  2021-04-02  \n",
       "9     https://www.nfu.edu.cn/xydt/a9523b72a34e4143af...  2021-04-02  \n",
       "10    https://www.nfu.edu.cn/xydt/f8434c9f092348c2a4...  2021-04-02  \n",
       "13    https://www.nfu.edu.cn/xydt/bdda4dfbda944a3eb8...  2021-04-01  \n",
       "12    https://www.nfu.edu.cn/xydt/4b0c8e69b5074d28ba...  2021-04-01  \n",
       "11    https://www.nfu.edu.cn/xydt/d60d33983337463390...  2021-04-01  \n",
       "14    https://www.nfu.edu.cn/xydt/96e05388e3fa43de9a...  2021-03-30  \n",
       "15    https://www.nfu.edu.cn/xydt/25dc7cb574284be18a...  2021-03-30  \n",
       "16    https://www.nfu.edu.cn/xydt/b9777111c2194e7b85...  2021-03-29  \n",
       "17    https://www.nfu.edu.cn/xydt/2f3dcc0f4400419e8e...  2021-03-29  \n",
       "18    https://www.nfu.edu.cn/xydt/c3846031b4c0444a99...  2021-03-26  \n",
       "19    https://www.nfu.edu.cn/xydt/5e165fdaee83489989...  2021-03-26  \n",
       "20    https://www.nfu.edu.cn/xydt/76d9a219517e4a2b8c...  2021-03-25  \n",
       "21    https://www.nfu.edu.cn/xydt/bf6e98d6027544809e...  2021-03-24  \n",
       "22    https://www.nfu.edu.cn/xydt/874bf131af7746d888...  2021-03-23  \n",
       "24    https://www.nfu.edu.cn/xydt/7f86c7803e7c440883...  2021-03-22  \n",
       "23    https://www.nfu.edu.cn/xydt/30f39cccbda84ecb92...  2021-03-22  \n",
       "25    https://www.nfu.edu.cn/xydt/635fef967364474789...  2021-03-18  \n",
       "26    https://www.nfu.edu.cn/xydt/2de35ebbd3574d8f9f...  2021-03-17  \n",
       "27    https://www.nfu.edu.cn/xydt/1ca0eaebd5754d809a...  2021-03-15  \n",
       "28    https://www.nfu.edu.cn/xydt/4fa9318bd56f4feea8...  2021-03-15  \n",
       "29    https://www.nfu.edu.cn/xydt/8c88bbfcfb0f47c7b1...  2021-03-15  \n",
       "...                                                 ...         ...  \n",
       "1634  https://www.nfu.edu.cn/xydt/129b868ea670422bbf...  2016-03-22  \n",
       "1635  https://www.nfu.edu.cn/xydt/ee227cdddb6549d7b6...  2016-03-22  \n",
       "1636  https://www.nfu.edu.cn/xydt/b72a0e63860f4cf681...  2016-03-19  \n",
       "1637  https://www.nfu.edu.cn/xydt/209b8e39f508454a86...  2016-03-16  \n",
       "1639  https://www.nfu.edu.cn/xydt/cb1f1c6c8b824282ad...  2016-03-15  \n",
       "1638  https://www.nfu.edu.cn/xydt/056a88be1f054f3289...  2016-03-15  \n",
       "1640  https://www.nfu.edu.cn/xydt/076e3d04308647da81...  2016-03-14  \n",
       "1641  https://www.nfu.edu.cn/xydt/a3d003ee0cb845c1af...  2016-03-12  \n",
       "1643  https://www.nfu.edu.cn/xydt/8f195f6a59db4bb4bd...  2016-03-11  \n",
       "1642  https://www.nfu.edu.cn/xydt/077570f22ca4477cae...  2016-03-11  \n",
       "1644  https://www.nfu.edu.cn/xydt/fb57039d5437421cb9...  2016-03-10  \n",
       "1645  https://www.nfu.edu.cn/xydt/2db3a56623ae40c5b4...  2016-03-08  \n",
       "1647  https://www.nfu.edu.cn/xydt/b93b1cac18e846baac...  2016-03-07  \n",
       "1646  https://www.nfu.edu.cn/xydt/0181005d75c745ae86...  2016-03-07  \n",
       "1648  https://www.nfu.edu.cn/xydt/e36771c2b86d4d498a...  2016-03-03  \n",
       "1649  https://www.nfu.edu.cn/xydt/da5c7e4c2ac441e68b...  2016-03-02  \n",
       "1652  https://www.nfu.edu.cn/xydt/29cb4400d9e44acfbe...  2016-03-01  \n",
       "1651  https://www.nfu.edu.cn/xydt/5912160e075c4aa9a0...  2016-03-01  \n",
       "1650  https://www.nfu.edu.cn/xydt/01bc304799e44dc0ab...  2016-03-01  \n",
       "1653  https://www.nfu.edu.cn/xydt/fe516b71e79544bb80...  2016-02-29  \n",
       "1654  https://www.nfu.edu.cn/xydt/30996b733cb74e0e81...  2016-01-28  \n",
       "1655  https://www.nfu.edu.cn/xydt/fbe740cdd4534c20af...  2016-01-25  \n",
       "1656  https://www.nfu.edu.cn/xydt/7367ece3043744caac...  2016-01-18  \n",
       "1657  https://www.nfu.edu.cn/xydt/ff3a705f1c104117a4...  2016-01-13  \n",
       "1659  https://www.nfu.edu.cn/xydt/5cc461d4a37a4afb8d...  2016-01-08  \n",
       "1658  https://www.nfu.edu.cn/xydt/ae2cf68f6b1247a6a1...  2016-01-08  \n",
       "1661  https://www.nfu.edu.cn/xydt/3d02b255690e4ce796...  2016-01-04  \n",
       "1660  https://www.nfu.edu.cn/xydt/9b59863e5051412d80...  2016-01-04  \n",
       "1662  https://www.nfu.edu.cn/xydt/0132532dbb0e448d82...  2015-10-30  \n",
       "1663  https://www.nfu.edu.cn/xydt/2d20a911787b4cc09c...  2015-03-28  \n",
       "\n",
       "[1684 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "list_df = []\n",
    "\n",
    "\n",
    "files= os.listdir('html_out/xydt/')\n",
    "print(files)\n",
    "\n",
    "for html in files:\n",
    "    with open('html_out/xydt/'+html,encoding='utf8',mode='r') as fp:\n",
    "        html_load = fp.read()\n",
    "        parsed = requests_html.soup_parse(html_load)\n",
    "        list_URL = pages_content_url(parsed)\n",
    "        \n",
    "        df = pd.DataFrame( {\n",
    "         \"标题\": parsed.xpath(dict_xpath['标题_xpath']),\n",
    "         \"链结\": list_URL,\n",
    "         \"日期\": parsed.xpath(dict_xpath['日期_xpath']),\n",
    "        } )\n",
    "        list_df.append(df)\n",
    "\n",
    "        \n",
    "        \n",
    "df_all = pd.concat(list_df).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all)    \n",
    "\n",
    "df_all\n",
    "\n",
    "df_all.to_excel(\"data_out/nfu_xydt_文学与传媒学院.xlsx\", sheet_name=\"校园动态\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ！！通知公告"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A1  nfu.edu.cn \n",
    "session = HTMLSession()\n",
    "r = session.get(\"https://www.nfu.edu.cn/tzgg/index.htm\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 222,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Element html at 0x285f8fc7a98>"
      ]
     },
     "execution_count": 222,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "parsed = requests_html.soup_parse(html_load)\n",
    "parsed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ParseResult(scheme='https', netloc='www.nfu.edu.cn', path='/tzgg/index.htm', params='', query='', fragment='')"
      ]
     },
     "execution_count": 223,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "base_url = r.url\n",
    "nfu_urlparse = urllib.parse.urlparse(base_url)\n",
    "nfu_urlparse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "35\n"
     ]
    }
   ],
   "source": [
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/tzgg/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break\n",
    "# so page = 19?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/tzgg/index1.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index2.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index3.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index4.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index5.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index6.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index7.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index8.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index9.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index10.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index11.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index12.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index13.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index14.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index15.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index16.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index17.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index18.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index19.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index20.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index21.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index22.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index23.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index24.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index25.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index26.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index27.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index28.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index29.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index30.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index31.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index32.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index33.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index34.htm']"
      ]
     },
     "execution_count": 225,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_tzgg = ['https://www.nfu.edu.cn/tzgg/index'+str(i)+'.htm' for i in range(1,35)]\n",
    "url_group_tzgg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "metadata": {},
   "outputs": [],
   "source": [
    "url_group_tzgg.insert(0,'https://www.nfu.edu.cn/tzgg/index.htm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/tzgg/index.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index1.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index2.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index3.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index4.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index5.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index6.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index7.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index8.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index9.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index10.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index11.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index12.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index13.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index14.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index15.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index16.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index17.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index18.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index19.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index20.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index21.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index22.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index23.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index24.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index25.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index26.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index27.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index28.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index29.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index30.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index31.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index32.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index33.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index34.htm']"
      ]
     },
     "execution_count": 227,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_tzgg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 228,
   "metadata": {},
   "outputs": [],
   "source": [
    "for url in url_group_tzgg:\n",
    "    r = session.get(url)\n",
    "#     print(r.html.html)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 229,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/tzgg/index.htm'"
      ]
     },
     "execution_count": 229,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "urllib.parse.urlparse(url_group_tzgg[0]).path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "metadata": {},
   "outputs": [],
   "source": [
    "# xpath 准备：\n",
    "dict_xpath = {\n",
    "    '链接_xpath':'//div[@class=\"news_title\"]/a/@href',\n",
    "    '标题_xpath':'//div[@class=\"news_title\"]/a/@title',\n",
    "    '日期_xpath':'//font[@class=\"right-more\"]/text()'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pages_content_url(parsed):\n",
    "    list_URL  = [urllib.parse.urlunparse\\\n",
    "                 ([nfu_urlparse.scheme,nfu_urlparse.netloc,'/'+ nfu_urlparse.path.split('/')[1] +'/' + detail_url,'','',''])\\\n",
    "                 for detail_url in parsed.xpath(dict_xpath['链接_xpath'])]\n",
    "    return list_URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['index.htm', 'index1.htm', 'index10.htm', 'index11.htm', 'index12.htm', 'index13.htm', 'index14.htm', 'index15.htm', 'index16.htm', 'index17.htm', 'index18.htm', 'index19.htm', 'index2.htm', 'index20.htm', 'index21.htm', 'index22.htm', 'index23.htm', 'index24.htm', 'index25.htm', 'index26.htm', 'index27.htm', 'index28.htm', 'index29.htm', 'index3.htm', 'index30.htm', 'index31.htm', 'index32.htm', 'index33.htm', 'index34.htm', 'index4.htm', 'index5.htm', 'index6.htm', 'index7.htm', 'index8.htm', 'index9.htm']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>校园管理部关于2021年元旦放假校园生活服务安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/cd60e06378e5449294...</td>\n",
       "      <td>2020-12-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>中山大学南方学院关于2021年元旦放假安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/16fcbd56eab04220b3...</td>\n",
       "      <td>2020-12-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>关于开展2020年知识产权竞赛的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/155655d4a7e74c7695...</td>\n",
       "      <td>2020-12-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>中山大学南方学院关于举办2020年预防艾滋病巡讲活动的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/f381db0e5b3e4746b3...</td>\n",
       "      <td>2020-12-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>关于开展2020年安全知识竞赛的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/ae83ecc6ce894bcb81...</td>\n",
       "      <td>2020-12-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>中山大学南方学院关于举办2020～2021学年教职工体育活动的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/47ba4c0dfb1443f994...</td>\n",
       "      <td>2020-11-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>中山大学南方学院关于举办2020年体育大会的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/21cd39d341924ffd93...</td>\n",
       "      <td>2020-11-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>学校办公室关于举办2020年秋季学期校长午餐会的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/66fc5e810c664b919b...</td>\n",
       "      <td>2020-11-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>中山大学南方学院关于进一步做好疫情防控常态化校园管理工作的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/c63acd1ca67746ea93...</td>\n",
       "      <td>2020-10-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>关于国庆中秋假期学生出入校园管理的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/6e824246669d471ca2...</td>\n",
       "      <td>2020-09-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>校园管理部关于调整班车运行班次的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/df9afab7eb564161bc...</td>\n",
       "      <td>2020-09-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>中山大学南方学院关于2020年国庆节、中秋节放假安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/b57e28f6cd5a4cb7b1...</td>\n",
       "      <td>2020-09-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>中山大学南方学院关于启用电子校园卡的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/ad7da9ff14494749b7...</td>\n",
       "      <td>2020-09-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>关于开展“节约粮食从点滴做起，请跟我一起行动” 主题团日活动的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/54573ec40397435ca4...</td>\n",
       "      <td>2020-08-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>中山大学南方学院关于成立公共管理学院的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/e28b2797a45d471b88...</td>\n",
       "      <td>2020-08-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>中共中山大学南方学院委员会关于公共管理学系党总支更名的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/fc277cccd7f440a2bd...</td>\n",
       "      <td>2020-08-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>校园管理部关于2019～2020学年暑假期间校园生活服务安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/f04e18c7e60b437d8c...</td>\n",
       "      <td>2020-07-24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>中山大学南方学院关于公布2020-2021学年校历的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/4df100c9d8754356a6...</td>\n",
       "      <td>2020-07-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>校园管理部关于2020年端午节假期校园生活服务安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/ed13952ecd4343d99d...</td>\n",
       "      <td>2020-06-24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>校园管理部关于2020届毕业生缴纳水费的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/8eece1bbe3b34e489e...</td>\n",
       "      <td>2020-06-23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0</td>\n",
       "      <td>保卫部关于学校中区食堂南侧校道实行交通管制的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/59b8a6f87bde4424be...</td>\n",
       "      <td>2020-06-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1</td>\n",
       "      <td>中山大学南方学院关于2020年端午节放假安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/72612c183e4c4b67a7...</td>\n",
       "      <td>2020-06-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2</td>\n",
       "      <td>关于近期强降水对学校造成地质灾害情况的通报</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/7ac1ab87177e44f2a9...</td>\n",
       "      <td>2020-06-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>3</td>\n",
       "      <td>校园管理部关于调整班车运行班次的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/40ed7e25c7e045ac82...</td>\n",
       "      <td>2020-06-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>4</td>\n",
       "      <td>关于组织我校学生参加上半年心理健康普查的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/379e062afbda4f9197...</td>\n",
       "      <td>2020-06-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>5</td>\n",
       "      <td>教务部关于调整办理学生证明证件等业务的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/e52c376f9f5b4c5d89...</td>\n",
       "      <td>2020-05-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>6</td>\n",
       "      <td>校园管理部关于2020届毕业生办理医保报销的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/38ea1d9ea083431b8d...</td>\n",
       "      <td>2020-05-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>7</td>\n",
       "      <td>校园管理部关于中区食堂教职工餐厅恢复供餐的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/7264828274f34fabbb...</td>\n",
       "      <td>2020-05-24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>8</td>\n",
       "      <td>保卫部关于学生返校期间校园交通管制的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/94b1ea11046241f29f...</td>\n",
       "      <td>2020-05-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>9</td>\n",
       "      <td>中山大学南方学院关于2020年春季学期  学生返校工作安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/59041328b28b4abf9a...</td>\n",
       "      <td>2020-05-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537</th>\n",
       "      <td>17</td>\n",
       "      <td>关于临时封闭慎思路的温馨提示</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/aad413f5580f44a0a6...</td>\n",
       "      <td>2015-07-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>538</th>\n",
       "      <td>18</td>\n",
       "      <td>会计学系“财经名家讲坛”系列讲座之第二十七讲讲座通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/2a1d1cbd06634431be...</td>\n",
       "      <td>2015-06-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>539</th>\n",
       "      <td>19</td>\n",
       "      <td>关于我院在校学生参加2016年度广州市城乡居民基本医疗保险的通知（以此通知为准）</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/3a02e2fdcd7e41d59d...</td>\n",
       "      <td>2015-06-24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540</th>\n",
       "      <td>0</td>\n",
       "      <td>2015年夏秋季征兵工作已拉开帷幕——致广大高校青年学生的一封信</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/5f8a4d7ef5274db1b8...</td>\n",
       "      <td>2015-06-23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>541</th>\n",
       "      <td>1</td>\n",
       "      <td>关于从化汽车站返程南方学院班车路线调整的补充公告</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/b80d000f191d4b1e9d...</td>\n",
       "      <td>2015-06-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>542</th>\n",
       "      <td>2</td>\n",
       "      <td>团委关于召开中山大学南方学院第八次学生代表大会的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/16614f7d8c8042009d...</td>\n",
       "      <td>2015-06-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>543</th>\n",
       "      <td>3</td>\n",
       "      <td>关于第九届学生会委员候选人名单的公示</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/7210b1d263a74d5397...</td>\n",
       "      <td>2015-06-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>544</th>\n",
       "      <td>4</td>\n",
       "      <td>关于举办珠江大学联盟音乐会暨中山大学南方学院音乐厅落成典礼的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/96fef1bd78794878a9...</td>\n",
       "      <td>2015-05-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>545</th>\n",
       "      <td>5</td>\n",
       "      <td>中山大学南方学院致全体学生的一封信</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/d743121a322243b181...</td>\n",
       "      <td>2015-05-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546</th>\n",
       "      <td>6</td>\n",
       "      <td>中山大学南方学院关于停水等有关情况的通报（2015年5月26日）</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/c0b86ecc3a5b441894...</td>\n",
       "      <td>2015-05-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>547</th>\n",
       "      <td>7</td>\n",
       "      <td>中山大学南方学院关于停水等有关情况的通报（2015年5月25日）</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/4d0269b7efc94294a4...</td>\n",
       "      <td>2015-05-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>548</th>\n",
       "      <td>8</td>\n",
       "      <td>中山大学南方学院关于停水情况的通报</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/8282e45d6fbc4ec4a9...</td>\n",
       "      <td>2015-05-23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549</th>\n",
       "      <td>9</td>\n",
       "      <td>学院办公室关于2015年端午放假安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/71c2e1bab7fa481c83...</td>\n",
       "      <td>2015-05-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>550</th>\n",
       "      <td>10</td>\n",
       "      <td>关于学院八期工程进场锤击桩机施工产生噪音的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/eee22820bd4445baad...</td>\n",
       "      <td>2015-05-19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>551</th>\n",
       "      <td>11</td>\n",
       "      <td>中山大学南方学院关于班车运行时刻调整及交通信息的公告</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/d2ad643b0c5d4aa4ac...</td>\n",
       "      <td>2015-05-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552</th>\n",
       "      <td>12</td>\n",
       "      <td>关于组织我院2015届毕业生参加广东省 “三支一扶”招募计划的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/cda59bcea899465ba9...</td>\n",
       "      <td>2015-05-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>553</th>\n",
       "      <td>13</td>\n",
       "      <td>关于中山大学南方学院2014年度《广东省民办高校年度检查表》的公示</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/7e5b37c211ba46a483...</td>\n",
       "      <td>2015-05-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>554</th>\n",
       "      <td>14</td>\n",
       "      <td>中山大学南方学院关于开展2015年学位授予仪式主题语评选活动的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/93314a4bcf6a44d9bb...</td>\n",
       "      <td>2015-05-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>555</th>\n",
       "      <td>15</td>\n",
       "      <td>关于调整学院至中大南校区班车下车点的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/3f57418b49024903a9...</td>\n",
       "      <td>2015-04-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556</th>\n",
       "      <td>16</td>\n",
       "      <td>关于调整班车运行时刻表征求意见的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/d594c06a6c174b34b8...</td>\n",
       "      <td>2015-04-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>557</th>\n",
       "      <td>17</td>\n",
       "      <td>学院交通信息公告</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/5568847ca8a84832b3...</td>\n",
       "      <td>2015-04-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558</th>\n",
       "      <td>18</td>\n",
       "      <td>会计学系“财经名家讲坛”系列讲座之第二十五讲讲座通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/095c1bab2ccf4e98b2...</td>\n",
       "      <td>2015-04-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559</th>\n",
       "      <td>19</td>\n",
       "      <td>关于2015年6月普通话水平测试的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/76e4c24581bb4a79b8...</td>\n",
       "      <td>2015-04-23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>560</th>\n",
       "      <td>0</td>\n",
       "      <td>“南苑青年”系列讲座之第十四讲的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/e5e98fafc5584ce0a6...</td>\n",
       "      <td>2015-04-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561</th>\n",
       "      <td>1</td>\n",
       "      <td>中山大学南方学院关于开通监察与审计部网站的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/cca2d0c8a29540968d...</td>\n",
       "      <td>2015-04-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562</th>\n",
       "      <td>2</td>\n",
       "      <td>“南苑青年”系列讲座之第十三讲的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/bbd14d55a99247a79f...</td>\n",
       "      <td>2015-04-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>563</th>\n",
       "      <td>3</td>\n",
       "      <td>中山大学南方学院关于举办“南方湖畔·艺彩纷呈”第七届校园文化艺术节活动通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/a8e5e752e409486da2...</td>\n",
       "      <td>2015-04-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>564</th>\n",
       "      <td>4</td>\n",
       "      <td>学院办公室关于2015年五一放假安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/e3f763049ee54cfc8c...</td>\n",
       "      <td>2015-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>565</th>\n",
       "      <td>5</td>\n",
       "      <td>中山大学南方学院关于2015年公共机构节能宣传作品征集活动的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/f3ae1aa3ccdb4d87bc...</td>\n",
       "      <td>2015-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>566</th>\n",
       "      <td>6</td>\n",
       "      <td>关于开展校园网络和运营商移动网络使用情况调查的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/6de44f6a618540ef82...</td>\n",
       "      <td>1970-01-01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>687 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index                                        标题  \\\n",
       "0        0               校园管理部关于2021年元旦放假校园生活服务安排的通知   \n",
       "1        1                  中山大学南方学院关于2021年元旦放假安排的通知   \n",
       "2        2                        关于开展2020年知识产权竞赛的通知   \n",
       "3        3             中山大学南方学院关于举办2020年预防艾滋病巡讲活动的通知   \n",
       "4        4                        关于开展2020年安全知识竞赛的通知   \n",
       "5        5         中山大学南方学院关于举办2020～2021学年教职工体育活动的通知   \n",
       "6        6                  中山大学南方学院关于举办2020年体育大会的通知   \n",
       "7        7                学校办公室关于举办2020年秋季学期校长午餐会的通知   \n",
       "8        8           中山大学南方学院关于进一步做好疫情防控常态化校园管理工作的通知   \n",
       "9        9                       关于国庆中秋假期学生出入校园管理的通知   \n",
       "10      10                        校园管理部关于调整班车运行班次的通知   \n",
       "11      11             中山大学南方学院关于2020年国庆节、中秋节放假安排的通知   \n",
       "12      12                      中山大学南方学院关于启用电子校园卡的通知   \n",
       "13      13         关于开展“节约粮食从点滴做起，请跟我一起行动” 主题团日活动的通知   \n",
       "14      14                     中山大学南方学院关于成立公共管理学院的通知   \n",
       "15      15             中共中山大学南方学院委员会关于公共管理学系党总支更名的通知   \n",
       "16      16         校园管理部关于2019～2020学年暑假期间校园生活服务安排的通知   \n",
       "17      17              中山大学南方学院关于公布2020-2021学年校历的通知   \n",
       "18      18              校园管理部关于2020年端午节假期校园生活服务安排的通知   \n",
       "19      19                    校园管理部关于2020届毕业生缴纳水费的通知   \n",
       "20       0                  保卫部关于学校中区食堂南侧校道实行交通管制的通知   \n",
       "21       1                 中山大学南方学院关于2020年端午节放假安排的通知   \n",
       "22       2                     关于近期强降水对学校造成地质灾害情况的通报   \n",
       "23       3                        校园管理部关于调整班车运行班次的通知   \n",
       "24       4                    关于组织我校学生参加上半年心理健康普查的通知   \n",
       "25       5                     教务部关于调整办理学生证明证件等业务的通知   \n",
       "26       6                  校园管理部关于2020届毕业生办理医保报销的通知   \n",
       "27       7                   校园管理部关于中区食堂教职工餐厅恢复供餐的通知   \n",
       "28       8                      保卫部关于学生返校期间校园交通管制的通知   \n",
       "29       9          中山大学南方学院关于2020年春季学期  学生返校工作安排的通知   \n",
       "..     ...                                       ...   \n",
       "537     17                            关于临时封闭慎思路的温馨提示   \n",
       "538     18                会计学系“财经名家讲坛”系列讲座之第二十七讲讲座通知   \n",
       "539     19  关于我院在校学生参加2016年度广州市城乡居民基本医疗保险的通知（以此通知为准）   \n",
       "540      0          2015年夏秋季征兵工作已拉开帷幕——致广大高校青年学生的一封信   \n",
       "541      1                  关于从化汽车站返程南方学院班车路线调整的补充公告   \n",
       "542      2                团委关于召开中山大学南方学院第八次学生代表大会的通知   \n",
       "543      3                        关于第九届学生会委员候选人名单的公示   \n",
       "544      4          关于举办珠江大学联盟音乐会暨中山大学南方学院音乐厅落成典礼的通知   \n",
       "545      5                         中山大学南方学院致全体学生的一封信   \n",
       "546      6          中山大学南方学院关于停水等有关情况的通报（2015年5月26日）   \n",
       "547      7          中山大学南方学院关于停水等有关情况的通报（2015年5月25日）   \n",
       "548      8                         中山大学南方学院关于停水情况的通报   \n",
       "549      9                     学院办公室关于2015年端午放假安排的通知   \n",
       "550     10                   关于学院八期工程进场锤击桩机施工产生噪音的通知   \n",
       "551     11                中山大学南方学院关于班车运行时刻调整及交通信息的公告   \n",
       "552     12         关于组织我院2015届毕业生参加广东省 “三支一扶”招募计划的通知   \n",
       "553     13         关于中山大学南方学院2014年度《广东省民办高校年度检查表》的公示   \n",
       "554     14         中山大学南方学院关于开展2015年学位授予仪式主题语评选活动的通知   \n",
       "555     15                      关于调整学院至中大南校区班车下车点的通知   \n",
       "556     16                        关于调整班车运行时刻表征求意见的通知   \n",
       "557     17                                  学院交通信息公告   \n",
       "558     18                会计学系“财经名家讲坛”系列讲座之第二十五讲讲座通知   \n",
       "559     19                       关于2015年6月普通话水平测试的通知   \n",
       "560      0                        “南苑青年”系列讲座之第十四讲的通知   \n",
       "561      1                   中山大学南方学院关于开通监察与审计部网站的通知   \n",
       "562      2                        “南苑青年”系列讲座之第十三讲的通知   \n",
       "563      3     中山大学南方学院关于举办“南方湖畔·艺彩纷呈”第七届校园文化艺术节活动通知   \n",
       "564      4                     学院办公室关于2015年五一放假安排的通知   \n",
       "565      5          中山大学南方学院关于2015年公共机构节能宣传作品征集活动的通知   \n",
       "566      6                 关于开展校园网络和运营商移动网络使用情况调查的通知   \n",
       "\n",
       "                                                    链结          日期  \n",
       "0    https://www.nfu.edu.cn/tzgg/cd60e06378e5449294...  2020-12-25  \n",
       "1    https://www.nfu.edu.cn/tzgg/16fcbd56eab04220b3...  2020-12-17  \n",
       "2    https://www.nfu.edu.cn/tzgg/155655d4a7e74c7695...  2020-12-16  \n",
       "3    https://www.nfu.edu.cn/tzgg/f381db0e5b3e4746b3...  2020-12-03  \n",
       "4    https://www.nfu.edu.cn/tzgg/ae83ecc6ce894bcb81...  2020-12-03  \n",
       "5    https://www.nfu.edu.cn/tzgg/47ba4c0dfb1443f994...  2020-11-25  \n",
       "6    https://www.nfu.edu.cn/tzgg/21cd39d341924ffd93...  2020-11-07  \n",
       "7    https://www.nfu.edu.cn/tzgg/66fc5e810c664b919b...  2020-11-07  \n",
       "8    https://www.nfu.edu.cn/tzgg/c63acd1ca67746ea93...  2020-10-17  \n",
       "9    https://www.nfu.edu.cn/tzgg/6e824246669d471ca2...  2020-09-25  \n",
       "10   https://www.nfu.edu.cn/tzgg/df9afab7eb564161bc...  2020-09-15  \n",
       "11   https://www.nfu.edu.cn/tzgg/b57e28f6cd5a4cb7b1...  2020-09-11  \n",
       "12   https://www.nfu.edu.cn/tzgg/ad7da9ff14494749b7...  2020-09-08  \n",
       "13   https://www.nfu.edu.cn/tzgg/54573ec40397435ca4...  2020-08-20  \n",
       "14   https://www.nfu.edu.cn/tzgg/e28b2797a45d471b88...  2020-08-05  \n",
       "15   https://www.nfu.edu.cn/tzgg/fc277cccd7f440a2bd...  2020-08-05  \n",
       "16   https://www.nfu.edu.cn/tzgg/f04e18c7e60b437d8c...  2020-07-24  \n",
       "17   https://www.nfu.edu.cn/tzgg/4df100c9d8754356a6...  2020-07-18  \n",
       "18   https://www.nfu.edu.cn/tzgg/ed13952ecd4343d99d...  2020-06-24  \n",
       "19   https://www.nfu.edu.cn/tzgg/8eece1bbe3b34e489e...  2020-06-23  \n",
       "20   https://www.nfu.edu.cn/tzgg/59b8a6f87bde4424be...  2020-06-14  \n",
       "21   https://www.nfu.edu.cn/tzgg/72612c183e4c4b67a7...  2020-06-11  \n",
       "22   https://www.nfu.edu.cn/tzgg/7ac1ab87177e44f2a9...  2020-06-09  \n",
       "23   https://www.nfu.edu.cn/tzgg/40ed7e25c7e045ac82...  2020-06-08  \n",
       "24   https://www.nfu.edu.cn/tzgg/379e062afbda4f9197...  2020-06-05  \n",
       "25   https://www.nfu.edu.cn/tzgg/e52c376f9f5b4c5d89...  2020-05-29  \n",
       "26   https://www.nfu.edu.cn/tzgg/38ea1d9ea083431b8d...  2020-05-27  \n",
       "27   https://www.nfu.edu.cn/tzgg/7264828274f34fabbb...  2020-05-24  \n",
       "28   https://www.nfu.edu.cn/tzgg/94b1ea11046241f29f...  2020-05-22  \n",
       "29   https://www.nfu.edu.cn/tzgg/59041328b28b4abf9a...  2020-05-18  \n",
       "..                                                 ...         ...  \n",
       "537  https://www.nfu.edu.cn/tzgg/aad413f5580f44a0a6...  2015-07-06  \n",
       "538  https://www.nfu.edu.cn/tzgg/2a1d1cbd06634431be...  2015-06-26  \n",
       "539  https://www.nfu.edu.cn/tzgg/3a02e2fdcd7e41d59d...  2015-06-24  \n",
       "540  https://www.nfu.edu.cn/tzgg/5f8a4d7ef5274db1b8...  2015-06-23  \n",
       "541  https://www.nfu.edu.cn/tzgg/b80d000f191d4b1e9d...  2015-06-05  \n",
       "542  https://www.nfu.edu.cn/tzgg/16614f7d8c8042009d...  2015-06-03  \n",
       "543  https://www.nfu.edu.cn/tzgg/7210b1d263a74d5397...  2015-06-03  \n",
       "544  https://www.nfu.edu.cn/tzgg/96fef1bd78794878a9...  2015-05-27  \n",
       "545  https://www.nfu.edu.cn/tzgg/d743121a322243b181...  2015-05-27  \n",
       "546  https://www.nfu.edu.cn/tzgg/c0b86ecc3a5b441894...  2015-05-26  \n",
       "547  https://www.nfu.edu.cn/tzgg/4d0269b7efc94294a4...  2015-05-25  \n",
       "548  https://www.nfu.edu.cn/tzgg/8282e45d6fbc4ec4a9...  2015-05-23  \n",
       "549  https://www.nfu.edu.cn/tzgg/71c2e1bab7fa481c83...  2015-05-20  \n",
       "550  https://www.nfu.edu.cn/tzgg/eee22820bd4445baad...  2015-05-19  \n",
       "551  https://www.nfu.edu.cn/tzgg/d2ad643b0c5d4aa4ac...  2015-05-18  \n",
       "552  https://www.nfu.edu.cn/tzgg/cda59bcea899465ba9...  2015-05-08  \n",
       "553  https://www.nfu.edu.cn/tzgg/7e5b37c211ba46a483...  2015-05-05  \n",
       "554  https://www.nfu.edu.cn/tzgg/93314a4bcf6a44d9bb...  2015-05-04  \n",
       "555  https://www.nfu.edu.cn/tzgg/3f57418b49024903a9...  2015-04-29  \n",
       "556  https://www.nfu.edu.cn/tzgg/d594c06a6c174b34b8...  2015-04-29  \n",
       "557  https://www.nfu.edu.cn/tzgg/5568847ca8a84832b3...  2015-04-28  \n",
       "558  https://www.nfu.edu.cn/tzgg/095c1bab2ccf4e98b2...  2015-04-28  \n",
       "559  https://www.nfu.edu.cn/tzgg/76e4c24581bb4a79b8...  2015-04-23  \n",
       "560  https://www.nfu.edu.cn/tzgg/e5e98fafc5584ce0a6...  2015-04-15  \n",
       "561  https://www.nfu.edu.cn/tzgg/cca2d0c8a29540968d...  2015-04-10  \n",
       "562  https://www.nfu.edu.cn/tzgg/bbd14d55a99247a79f...  2015-04-08  \n",
       "563  https://www.nfu.edu.cn/tzgg/a8e5e752e409486da2...  2015-04-07  \n",
       "564  https://www.nfu.edu.cn/tzgg/e3f763049ee54cfc8c...  2015-04-01  \n",
       "565  https://www.nfu.edu.cn/tzgg/f3ae1aa3ccdb4d87bc...  2015-04-01  \n",
       "566  https://www.nfu.edu.cn/tzgg/6de44f6a618540ef82...  1970-01-01  \n",
       "\n",
       "[687 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "list_df = []\n",
    "\n",
    "\n",
    "files= os.listdir('html_out/tzgg/')\n",
    "print(files)\n",
    "\n",
    "for html in files:\n",
    "    with open('html_out/tzgg/'+html,encoding='utf8',mode='r') as fp:\n",
    "        html_load = fp.read()\n",
    "        parsed = requests_html.soup_parse(html_load)\n",
    "        list_URL = pages_content_url(parsed)\n",
    "        \n",
    "        df = pd.DataFrame( {\n",
    "         \"标题\": parsed.xpath(dict_xpath['标题_xpath']),\n",
    "         \"链结\": list_URL,\n",
    "         \"日期\": parsed.xpath(dict_xpath['日期_xpath']),\n",
    "        } )\n",
    "        list_df.append(df)\n",
    "\n",
    "        \n",
    "        \n",
    "df_all = pd.concat(list_df).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all)    \n",
    "\n",
    "df_all\n",
    "\n",
    "df_all.to_excel(\"data_out/nfu_tzgg_文学与传媒学院.xlsx\", sheet_name=\"通知公告\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ！！招投标"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 259,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A1  nfu.edu.cn \n",
    "session = HTMLSession()\n",
    "r = session.get(\"https://www.nfu.edu.cn/ztb/index.htm\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 260,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Element html at 0x285f8ecbe08>"
      ]
     },
     "execution_count": 260,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "parsed = requests_html.soup_parse(html_load)\n",
    "parsed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 261,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ParseResult(scheme='https', netloc='www.nfu.edu.cn', path='/ztb/index.htm', params='', query='', fragment='')"
      ]
     },
     "execution_count": 261,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "base_url = r.url\n",
    "nfu_urlparse = urllib.parse.urlparse(base_url)\n",
    "nfu_urlparse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 262,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22\n"
     ]
    }
   ],
   "source": [
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/ztb/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break\n",
    "# so page = 19?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 263,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/ztb/index1.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index2.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index3.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index4.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index5.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index6.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index7.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index8.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index9.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index10.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index11.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index12.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index13.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index14.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index15.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index16.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index17.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index18.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index19.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index20.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index21.htm']"
      ]
     },
     "execution_count": 263,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_ztb = ['https://www.nfu.edu.cn/ztb/index'+str(i)+'.htm' for i in range(1,22)]\n",
    "url_group_ztb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 264,
   "metadata": {},
   "outputs": [],
   "source": [
    "url_group_ztb.insert(0,'https://www.nfu.edu.cn/ztb/index.htm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 265,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/ztb/index.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index1.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index2.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index3.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index4.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index5.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index6.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index7.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index8.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index9.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index10.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index11.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index12.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index13.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index14.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index15.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index16.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index17.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index18.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index19.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index20.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index21.htm']"
      ]
     },
     "execution_count": 265,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_ztb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 266,
   "metadata": {},
   "outputs": [],
   "source": [
    "for url in url_group_ztb:\n",
    "    r = session.get(url)\n",
    "#     print(r.html.html)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 276,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/ztb/index.htm'"
      ]
     },
     "execution_count": 276,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "urllib.parse.urlparse(url_group_ztb[0]).path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 277,
   "metadata": {},
   "outputs": [],
   "source": [
    "# xpath 准备：\n",
    "dict_xpath = {\n",
    "    '链接_xpath':'//div[@class=\"news_title\"]/a/@href',\n",
    "    '标题_xpath':'//div[@class=\"news_title\"]/a/@title',\n",
    "    '日期_xpath':'//font[@class=\"right-more\"]/text()'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 278,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pages_content_url(parsed):\n",
    "    list_URL  = [urllib.parse.urlunparse\\\n",
    "                 ([nfu_urlparse.scheme,nfu_urlparse.netloc,'/'+ nfu_urlparse.path.split('/')[1] +'/' + detail_url,'','',''])\\\n",
    "                 for detail_url in parsed.xpath(dict_xpath['链接_xpath'])]\n",
    "    return list_URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 279,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['index.htm', 'index1.htm', 'index10.htm', 'index11.htm', 'index12.htm', 'index13.htm', 'index14.htm', 'index15.htm', 'index16.htm', 'index17.htm', 'index18.htm', 'index19.htm', 'index2.htm', 'index20.htm', 'index21.htm', 'index3.htm', 'index4.htm', 'index5.htm', 'index6.htm', 'index7.htm', 'index8.htm', 'index9.htm']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目招标开标延期公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/4aa14103a6d34d42837...</td>\n",
       "      <td>2021-04-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目 招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/ea8754261f26419080a...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>中山大学南方学院数字电路基础实验室、电路与模拟电子实验室设备采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/7226fe9acf3b4757b97...</td>\n",
       "      <td>2021-03-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>中山大学南方学院垃圾清运和处理服务项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/414b2db5e6c04f99be1...</td>\n",
       "      <td>2021-03-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>中山大学南方学院2021年度维修、改造工程施工项目中标结果公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/60c660848ef44283bca...</td>\n",
       "      <td>2021-03-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>中山大学南方学院校舍家电采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/c1f45c4ed6d24523b80...</td>\n",
       "      <td>2021-03-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>中山大学南方学院办公电脑采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/8de22fa69c5a4718a5d...</td>\n",
       "      <td>2021-03-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>中山大学南方学院2021年度维修、改造工程施工项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/84df006147494c74a06...</td>\n",
       "      <td>2021-01-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>中山大学南方学院2021年度维修、改造工程施工项目流标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/5b6a96bc894e4901b90...</td>\n",
       "      <td>2021-01-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>中山大学南方学院2021年度维修、改造工程施工项目招标开标延期公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/cee6034ea34b4d37af1...</td>\n",
       "      <td>2021-01-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>中山大学南方学院2021年度维修、改造工程施工项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/2b0efb94d7bc43a69cf...</td>\n",
       "      <td>2020-12-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>中山大学南方学院网络技术与媒体研发平台实验室设备采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/4ca38f35a904483aa17...</td>\n",
       "      <td>2020-11-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>中山大学南方学院计算机组成原理实验箱采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/d9a43543bfc04b24960...</td>\n",
       "      <td>2020-11-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>中山大学南方学院2020-2021办公电脑采购项目​招标开标延期公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/a94be158ee2d45629fa...</td>\n",
       "      <td>2020-09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>中山大学南方学院2020-2021办公电脑采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/6c02c38297c94f82a0b...</td>\n",
       "      <td>2020-09-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>中山大学南方学院容灾自动备份系统采购项目 招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/f312609072284e91884...</td>\n",
       "      <td>2020-08-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>中山大学南方学院九期教工宿舍家具采购和安装项目中标结果公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/711839de4a50406da99...</td>\n",
       "      <td>2020-08-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>中山大学南方学院嵌入式创新实验室家具采购项目招标开标延期公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/ef39ea1df91b4208859...</td>\n",
       "      <td>2020-07-24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>中山大学南方学院跨境电商平台系统采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/303ff597654847ad9fe...</td>\n",
       "      <td>2020-07-24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1</td>\n",
       "      <td>中山大学南方学院嵌入式创新实验室家具采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/e71e4eac29194915ac3...</td>\n",
       "      <td>2020-07-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0</td>\n",
       "      <td>中山大学南方学院九期教工宿舍窗帘采购安装项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/bb5be88f7b7f424dab2...</td>\n",
       "      <td>2020-07-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>中山大学南方学院九期教工宿舍卫浴设施采购和安装项目招标开标延期公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/3c3b1ca74f0b47e1a6f...</td>\n",
       "      <td>2020-07-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2</td>\n",
       "      <td>中山大学南方学院九期教工宿舍卫浴设施采购和安装项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/8ee35c7b7d094cbc84a...</td>\n",
       "      <td>2020-07-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>3</td>\n",
       "      <td>中山大学南方学院九期教工宿舍家具采购和安装项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/4627d138819f481c903...</td>\n",
       "      <td>2020-07-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>4</td>\n",
       "      <td>中山大学南方学院数字技术平台实验室电脑设备采购项目中标结果公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/9aa6d2284a084531966...</td>\n",
       "      <td>2020-06-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>5</td>\n",
       "      <td>中山大学南方学院数字技术平台实验室电脑设备采购项目招标开标延期公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/f52f584b59e6449090d...</td>\n",
       "      <td>2020-05-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>6</td>\n",
       "      <td>中山大学南方学院嵌入式实验箱设备采购项目招标开标延期公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/38cb8c6d0c0a43a2a96...</td>\n",
       "      <td>2020-05-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>7</td>\n",
       "      <td>中山大学南方学院嵌入式实验箱设备采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/f392dcdb069a43e2b0d...</td>\n",
       "      <td>2020-05-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>8</td>\n",
       "      <td>中山大学南方学院数字技术平台实验室电脑设备采购项目招标公告（第二次）</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/84f0c06428404d469df...</td>\n",
       "      <td>2020-05-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>9</td>\n",
       "      <td>中山大学南方学院新建计算机基础实验室电脑采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/80d37f771060414e966...</td>\n",
       "      <td>2020-04-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>264</th>\n",
       "      <td>4</td>\n",
       "      <td>中山大学南方学院电气工程及其自动化专业实验室设备采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/0ca396abac75466787a...</td>\n",
       "      <td>2015-06-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>265</th>\n",
       "      <td>5</td>\n",
       "      <td>中山大学南方学院电气工程及其自动化专业实验室设备采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/2eed1ce5d94b4676bac...</td>\n",
       "      <td>2015-05-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266</th>\n",
       "      <td>6</td>\n",
       "      <td>中山大学南方学院经管实验中心二期采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/a54939eb7b854d409e9...</td>\n",
       "      <td>2015-05-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>267</th>\n",
       "      <td>7</td>\n",
       "      <td>中山大学南方学院档案室档案存储密集架采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/4ab47e3b5d194a59b0c...</td>\n",
       "      <td>2015-05-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>268</th>\n",
       "      <td>8</td>\n",
       "      <td>中山大学南方学院招待所改造设计采购项目招标公告(延期)</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/2286f0b0e088428187d...</td>\n",
       "      <td>2015-05-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269</th>\n",
       "      <td>9</td>\n",
       "      <td>中山大学南方学院白蚁红火蚁防治采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/6647633aa30142cb8ae...</td>\n",
       "      <td>2015-05-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>270</th>\n",
       "      <td>10</td>\n",
       "      <td>中山大学南方学院虚拟演播室采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/6bcc896bbc2d4f06969...</td>\n",
       "      <td>2015-05-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>271</th>\n",
       "      <td>11</td>\n",
       "      <td>中山大学南方学院白蚁红火蚁防治</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/1c90e2102e6a4b32928...</td>\n",
       "      <td>2015-05-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>272</th>\n",
       "      <td>12</td>\n",
       "      <td>中山大学南方学院虚拟演播室采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/50ee3e4ce8ba4099874...</td>\n",
       "      <td>2015-05-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273</th>\n",
       "      <td>13</td>\n",
       "      <td>中山大学南方学院经管实验中心二期硬件设备采购项目招标公告（延期）</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/95f0d988ed984ff189c...</td>\n",
       "      <td>2015-05-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>14</td>\n",
       "      <td>中山大学南方学院招待所改造工程设计项目招标更改及答疑</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/b9fca81e1a394a39a55...</td>\n",
       "      <td>2015-05-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>16</td>\n",
       "      <td>中山大学南方学院招待所改造设计采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/b74775af0f294621964...</td>\n",
       "      <td>2015-05-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>15</td>\n",
       "      <td>中山大学南方学院人体解剖实验室采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/44ef936e27cb4456b2e...</td>\n",
       "      <td>2015-05-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>277</th>\n",
       "      <td>17</td>\n",
       "      <td>中山大学南方学院视频监控系统项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/26e7011c300e44d295c...</td>\n",
       "      <td>2015-05-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>278</th>\n",
       "      <td>18</td>\n",
       "      <td>中山大学南方学院人体解剖实验室设备采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/d64d62048b704ec4b57...</td>\n",
       "      <td>2015-04-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>279</th>\n",
       "      <td>19</td>\n",
       "      <td>中山大学南方学院学生体质健康测试仪采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/aebfa587e06a4609a6d...</td>\n",
       "      <td>2015-04-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>280</th>\n",
       "      <td>0</td>\n",
       "      <td>中山大学南方学院学生体质健康测试仪采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/94ed4b33bb574655bb3...</td>\n",
       "      <td>2015-04-23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>281</th>\n",
       "      <td>1</td>\n",
       "      <td>中山大学南方学院虚拟演播室设备采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/2abe500eeb194ba28dd...</td>\n",
       "      <td>2015-04-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>282</th>\n",
       "      <td>2</td>\n",
       "      <td>中山大学南方学院经管实验中心二期硬件设备采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/4aced3639e614ce1a5c...</td>\n",
       "      <td>2015-04-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>283</th>\n",
       "      <td>3</td>\n",
       "      <td>中山大学南方学院校园</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/17a10312fd514d89b20...</td>\n",
       "      <td>2015-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>284</th>\n",
       "      <td>4</td>\n",
       "      <td>中山大学南方学院语音实验室及同声传译实验室设备采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/261e9468d6ff4f9b86f...</td>\n",
       "      <td>2015-04-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>285</th>\n",
       "      <td>5</td>\n",
       "      <td>中山大学南方学院经管实验中心及语音实验室设备采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/4a01f1f066e346cb804...</td>\n",
       "      <td>2015-04-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>286</th>\n",
       "      <td>6</td>\n",
       "      <td>中山大学南方学院语音实验室及同声传译实验室设备采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/84828ddf8fab4c1ebaf...</td>\n",
       "      <td>2015-04-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>287</th>\n",
       "      <td>7</td>\n",
       "      <td>中山大学南方学院音乐系阶梯课室座椅采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/ffd68b780f96460197f...</td>\n",
       "      <td>2015-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>288</th>\n",
       "      <td>8</td>\n",
       "      <td>中山大学南方学院计算机实验室设备采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/c138756c60844f7f9da...</td>\n",
       "      <td>2015-03-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>289</th>\n",
       "      <td>9</td>\n",
       "      <td>中山大学南方学院学生体质健康测试仪采购项目招标公告（第二次）</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/ba01c43761e245d4937...</td>\n",
       "      <td>2015-03-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>290</th>\n",
       "      <td>10</td>\n",
       "      <td>中山大学南方学院计算机实验室设备采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/0020f85b9ef24d0792d...</td>\n",
       "      <td>2015-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>291</th>\n",
       "      <td>11</td>\n",
       "      <td>中山大学南方学院电气工程及自动化实验室设备采购项目招标公告（第二次）</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/10482a669fc54447aa2...</td>\n",
       "      <td>2015-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>292</th>\n",
       "      <td>12</td>\n",
       "      <td>中山大学南方学院音乐楼阶梯课室座椅采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/4e5e67a17b7d47cf8cc...</td>\n",
       "      <td>2015-03-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>293</th>\n",
       "      <td>13</td>\n",
       "      <td>中山大学南方学院室内高尔夫模拟设备项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/35a1b4dab36a4ae5aa4...</td>\n",
       "      <td>2013-12-23</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>434 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index                                      标题  \\\n",
       "0        0     广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目招标开标延期公告   \n",
       "1        1        广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目 招标公告   \n",
       "2        2  中山大学南方学院数字电路基础实验室、电路与模拟电子实验室设备采购项目招标公告   \n",
       "3        3                 中山大学南方学院垃圾清运和处理服务项目招标公告   \n",
       "4        4         中山大学南方学院2021年度维修、改造工程施工项目中标结果公示   \n",
       "5        5                    中山大学南方学院校舍家电采购项目招标公告   \n",
       "6        6                    中山大学南方学院办公电脑采购项目招标公告   \n",
       "7        7           中山大学南方学院2021年度维修、改造工程施工项目招标公告   \n",
       "8        8           中山大学南方学院2021年度维修、改造工程施工项目流标公告   \n",
       "9        9       中山大学南方学院2021年度维修、改造工程施工项目招标开标延期公告   \n",
       "10      10           中山大学南方学院2021年度维修、改造工程施工项目招标公告   \n",
       "11      11        中山大学南方学院网络技术与媒体研发平台实验室设备采购项目招标公告   \n",
       "12      12              中山大学南方学院计算机组成原理实验箱采购项目招标公告   \n",
       "13      13      中山大学南方学院2020-2021办公电脑采购项目​招标开标延期公告   \n",
       "14      14           中山大学南方学院2020-2021办公电脑采购项目招标公告   \n",
       "15      15               中山大学南方学院容灾自动备份系统采购项目 招标公告   \n",
       "16      16           中山大学南方学院九期教工宿舍家具采购和安装项目中标结果公示   \n",
       "17      17          中山大学南方学院嵌入式创新实验室家具采购项目招标开标延期公告   \n",
       "18      18                中山大学南方学院跨境电商平台系统采购项目招标公告   \n",
       "21       1              中山大学南方学院嵌入式创新实验室家具采购项目招标公告   \n",
       "20       0              中山大学南方学院九期教工宿舍窗帘采购安装项目招标公告   \n",
       "19      19       中山大学南方学院九期教工宿舍卫浴设施采购和安装项目招标开标延期公告   \n",
       "22       2           中山大学南方学院九期教工宿舍卫浴设施采购和安装项目招标公告   \n",
       "23       3             中山大学南方学院九期教工宿舍家具采购和安装项目招标公告   \n",
       "24       4         中山大学南方学院数字技术平台实验室电脑设备采购项目中标结果公示   \n",
       "25       5       中山大学南方学院数字技术平台实验室电脑设备采购项目招标开标延期公告   \n",
       "26       6            中山大学南方学院嵌入式实验箱设备采购项目招标开标延期公告   \n",
       "27       7                中山大学南方学院嵌入式实验箱设备采购项目招标公告   \n",
       "28       8      中山大学南方学院数字技术平台实验室电脑设备采购项目招标公告（第二次）   \n",
       "29       9            中山大学南方学院新建计算机基础实验室电脑采购项目招标公告   \n",
       "..     ...                                     ...   \n",
       "264      4  中山大学南方学院电气工程及其自动化专业实验室设备采购项目中标（成交）结果公告   \n",
       "265      5        中山大学南方学院电气工程及其自动化专业实验室设备采购项目中标公示   \n",
       "266      6                中山大学南方学院经管实验中心二期采购项目中标公示   \n",
       "267      7              中山大学南方学院档案室档案存储密集架采购项目招标公告   \n",
       "268      8             中山大学南方学院招待所改造设计采购项目招标公告(延期)   \n",
       "269      9           中山大学南方学院白蚁红火蚁防治采购项目中标（成交）结果公告   \n",
       "270     10             中山大学南方学院虚拟演播室采购项目中标（成交）结果公告   \n",
       "271     11                         中山大学南方学院白蚁红火蚁防治   \n",
       "272     12                   中山大学南方学院虚拟演播室采购项目中标公示   \n",
       "273     13        中山大学南方学院经管实验中心二期硬件设备采购项目招标公告（延期）   \n",
       "274     14              中山大学南方学院招待所改造工程设计项目招标更改及答疑   \n",
       "276     16                 中山大学南方学院招待所改造设计采购项目招标公告   \n",
       "275     15           中山大学南方学院人体解剖实验室采购项目中标（成交）结果公告   \n",
       "277     17                    中山大学南方学院视频监控系统项目中标公示   \n",
       "278     18               中山大学南方学院人体解剖实验室设备采购项目中标公示   \n",
       "279     19         中山大学南方学院学生体质健康测试仪采购项目中标（成交）结果公告   \n",
       "280      0               中山大学南方学院学生体质健康测试仪采购项目中标公示   \n",
       "281      1                 中山大学南方学院虚拟演播室设备采购项目招标公告   \n",
       "282      2            中山大学南方学院经管实验中心二期硬件设备采购项目招标公告   \n",
       "283      3                              中山大学南方学院校园   \n",
       "284      4   中山大学南方学院语音实验室及同声传译实验室设备采购项目中标（成交）结果公告   \n",
       "285      5          中山大学南方学院经管实验中心及语音实验室设备采购项目中标公示   \n",
       "286      6         中山大学南方学院语音实验室及同声传译实验室设备采购项目中标公示   \n",
       "287      7         中山大学南方学院音乐系阶梯课室座椅采购项目中标（成交）结果公告   \n",
       "288      8          中山大学南方学院计算机实验室设备采购项目中标（成交）结果公告   \n",
       "289      9          中山大学南方学院学生体质健康测试仪采购项目招标公告（第二次）   \n",
       "290     10                中山大学南方学院计算机实验室设备采购项目中标公示   \n",
       "291     11      中山大学南方学院电气工程及自动化实验室设备采购项目招标公告（第二次）   \n",
       "292     12               中山大学南方学院音乐楼阶梯课室座椅采购项目中标公示   \n",
       "293     13                 中山大学南方学院室内高尔夫模拟设备项目招标公告   \n",
       "\n",
       "                                                    链结          日期  \n",
       "0    https://www.nfu.edu.cn/ztb/4aa14103a6d34d42837...  2021-04-08  \n",
       "1    https://www.nfu.edu.cn/ztb/ea8754261f26419080a...  2021-04-02  \n",
       "2    https://www.nfu.edu.cn/ztb/7226fe9acf3b4757b97...  2021-03-31  \n",
       "3    https://www.nfu.edu.cn/ztb/414b2db5e6c04f99be1...  2021-03-17  \n",
       "4    https://www.nfu.edu.cn/ztb/60c660848ef44283bca...  2021-03-11  \n",
       "5    https://www.nfu.edu.cn/ztb/c1f45c4ed6d24523b80...  2021-03-05  \n",
       "6    https://www.nfu.edu.cn/ztb/8de22fa69c5a4718a5d...  2021-03-03  \n",
       "7    https://www.nfu.edu.cn/ztb/84df006147494c74a06...  2021-01-26  \n",
       "8    https://www.nfu.edu.cn/ztb/5b6a96bc894e4901b90...  2021-01-25  \n",
       "9    https://www.nfu.edu.cn/ztb/cee6034ea34b4d37af1...  2021-01-05  \n",
       "10   https://www.nfu.edu.cn/ztb/2b0efb94d7bc43a69cf...  2020-12-11  \n",
       "11   https://www.nfu.edu.cn/ztb/4ca38f35a904483aa17...  2020-11-18  \n",
       "12   https://www.nfu.edu.cn/ztb/d9a43543bfc04b24960...  2020-11-13  \n",
       "13   https://www.nfu.edu.cn/ztb/a94be158ee2d45629fa...  2020-09-03  \n",
       "14   https://www.nfu.edu.cn/ztb/6c02c38297c94f82a0b...  2020-09-01  \n",
       "15   https://www.nfu.edu.cn/ztb/f312609072284e91884...  2020-08-15  \n",
       "16   https://www.nfu.edu.cn/ztb/711839de4a50406da99...  2020-08-14  \n",
       "17   https://www.nfu.edu.cn/ztb/ef39ea1df91b4208859...  2020-07-24  \n",
       "18   https://www.nfu.edu.cn/ztb/303ff597654847ad9fe...  2020-07-24  \n",
       "21   https://www.nfu.edu.cn/ztb/e71e4eac29194915ac3...  2020-07-17  \n",
       "20   https://www.nfu.edu.cn/ztb/bb5be88f7b7f424dab2...  2020-07-17  \n",
       "19   https://www.nfu.edu.cn/ztb/3c3b1ca74f0b47e1a6f...  2020-07-17  \n",
       "22   https://www.nfu.edu.cn/ztb/8ee35c7b7d094cbc84a...  2020-07-12  \n",
       "23   https://www.nfu.edu.cn/ztb/4627d138819f481c903...  2020-07-01  \n",
       "24   https://www.nfu.edu.cn/ztb/9aa6d2284a084531966...  2020-06-02  \n",
       "25   https://www.nfu.edu.cn/ztb/f52f584b59e6449090d...  2020-05-25  \n",
       "26   https://www.nfu.edu.cn/ztb/38cb8c6d0c0a43a2a96...  2020-05-25  \n",
       "27   https://www.nfu.edu.cn/ztb/f392dcdb069a43e2b0d...  2020-05-22  \n",
       "28   https://www.nfu.edu.cn/ztb/84f0c06428404d469df...  2020-05-15  \n",
       "29   https://www.nfu.edu.cn/ztb/80d37f771060414e966...  2020-04-30  \n",
       "..                                                 ...         ...  \n",
       "264  https://www.nfu.edu.cn/ztb/0ca396abac75466787a...  2015-06-02  \n",
       "265  https://www.nfu.edu.cn/ztb/2eed1ce5d94b4676bac...  2015-05-29  \n",
       "266  https://www.nfu.edu.cn/ztb/a54939eb7b854d409e9...  2015-05-29  \n",
       "267  https://www.nfu.edu.cn/ztb/4ab47e3b5d194a59b0c...  2015-05-25  \n",
       "268  https://www.nfu.edu.cn/ztb/2286f0b0e088428187d...  2015-05-25  \n",
       "269  https://www.nfu.edu.cn/ztb/6647633aa30142cb8ae...  2015-05-18  \n",
       "270  https://www.nfu.edu.cn/ztb/6bcc896bbc2d4f06969...  2015-05-18  \n",
       "271  https://www.nfu.edu.cn/ztb/1c90e2102e6a4b32928...  2015-05-12  \n",
       "272  https://www.nfu.edu.cn/ztb/50ee3e4ce8ba4099874...  2015-05-12  \n",
       "273  https://www.nfu.edu.cn/ztb/95f0d988ed984ff189c...  2015-05-06  \n",
       "274  https://www.nfu.edu.cn/ztb/b9fca81e1a394a39a55...  2015-05-05  \n",
       "276  https://www.nfu.edu.cn/ztb/b74775af0f294621964...  2015-05-04  \n",
       "275  https://www.nfu.edu.cn/ztb/44ef936e27cb4456b2e...  2015-05-04  \n",
       "277  https://www.nfu.edu.cn/ztb/26e7011c300e44d295c...  2015-05-03  \n",
       "278  https://www.nfu.edu.cn/ztb/d64d62048b704ec4b57...  2015-04-28  \n",
       "279  https://www.nfu.edu.cn/ztb/aebfa587e06a4609a6d...  2015-04-27  \n",
       "280  https://www.nfu.edu.cn/ztb/94ed4b33bb574655bb3...  2015-04-23  \n",
       "281  https://www.nfu.edu.cn/ztb/2abe500eeb194ba28dd...  2015-04-16  \n",
       "282  https://www.nfu.edu.cn/ztb/4aced3639e614ce1a5c...  2015-04-16  \n",
       "283  https://www.nfu.edu.cn/ztb/17a10312fd514d89b20...  2015-04-09  \n",
       "284  https://www.nfu.edu.cn/ztb/261e9468d6ff4f9b86f...  2015-04-07  \n",
       "285  https://www.nfu.edu.cn/ztb/4a01f1f066e346cb804...  2015-04-07  \n",
       "286  https://www.nfu.edu.cn/ztb/84828ddf8fab4c1ebaf...  2015-04-03  \n",
       "287  https://www.nfu.edu.cn/ztb/ffd68b780f96460197f...  2015-04-02  \n",
       "288  https://www.nfu.edu.cn/ztb/c138756c60844f7f9da...  2015-03-30  \n",
       "289  https://www.nfu.edu.cn/ztb/ba01c43761e245d4937...  2015-03-27  \n",
       "290  https://www.nfu.edu.cn/ztb/0020f85b9ef24d0792d...  2015-03-26  \n",
       "291  https://www.nfu.edu.cn/ztb/10482a669fc54447aa2...  2015-03-26  \n",
       "292  https://www.nfu.edu.cn/ztb/4e5e67a17b7d47cf8cc...  2015-03-20  \n",
       "293  https://www.nfu.edu.cn/ztb/35a1b4dab36a4ae5aa4...  2013-12-23  \n",
       "\n",
       "[434 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "list_df = []\n",
    "\n",
    "\n",
    "files= os.listdir('html_out/ztb/')\n",
    "print(files)\n",
    "\n",
    "for html in files:\n",
    "    with open('html_out/ztb/'+html,encoding='utf8',mode='r') as fp:\n",
    "        html_load = fp.read()\n",
    "        parsed = requests_html.soup_parse(html_load)\n",
    "        list_URL = pages_content_url(parsed)\n",
    "        \n",
    "        df = pd.DataFrame( {\n",
    "         \"标题\": parsed.xpath(dict_xpath['标题_xpath']),\n",
    "         \"链结\": list_URL,\n",
    "         \"日期\": parsed.xpath(dict_xpath['日期_xpath']),\n",
    "        } )\n",
    "        list_df.append(df)\n",
    "\n",
    "        \n",
    "        \n",
    "df_all = pd.concat(list_df).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all)    \n",
    "\n",
    "df_all\n",
    "\n",
    "df_all.to_excel(\"data_out/nfu_ztb_文学与传媒学院.xlsx\", sheet_name=\"招投标\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ！！高教动态"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 290,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A1  nfu.edu.cn \n",
    "session = HTMLSession()\n",
    "r = session.get(\"https://www.nfu.edu.cn/gjdt/index.htm\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Element html at 0x285fcb8de58>"
      ]
     },
     "execution_count": 291,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "parsed = requests_html.soup_parse(html_load)\n",
    "parsed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ParseResult(scheme='https', netloc='www.nfu.edu.cn', path='/gjdt/index.htm', params='', query='', fragment='')"
      ]
     },
     "execution_count": 292,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析\n",
    "base_url = r.url\n",
    "nfu_urlparse = urllib.parse.urlparse(base_url)\n",
    "nfu_urlparse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 293,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26\n"
     ]
    }
   ],
   "source": [
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/gjdt/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break\n",
    "# so page = 19?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 294,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/gjdt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index25.htm']"
      ]
     },
     "execution_count": 294,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_gjdt = ['https://www.nfu.edu.cn/gjdt/index'+str(i)+'.htm' for i in range(1,26)]\n",
    "url_group_gjdt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 295,
   "metadata": {},
   "outputs": [],
   "source": [
    "url_group_gjdt.insert(0,'https://www.nfu.edu.cn/gjdt/index.htm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 296,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/gjdt/index.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index25.htm']"
      ]
     },
     "execution_count": 296,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_gjdt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "metadata": {},
   "outputs": [],
   "source": [
    "for url in url_group_gjdt:\n",
    "    r = session.get(url)\n",
    "#     print(r.html.html)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/gjdt/index.htm'"
      ]
     },
     "execution_count": 300,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "urllib.parse.urlparse(url_group_gjdt[0]).path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 301,
   "metadata": {},
   "outputs": [],
   "source": [
    "# xpath 准备：\n",
    "dict_xpath = {\n",
    "    '链接_xpath':'//div[@class=\"news_title\"]/a/@href',\n",
    "    '标题_xpath':'//div[@class=\"news_title\"]/a/@title',\n",
    "    '日期_xpath':'//font[@class=\"right-more\"]/text()'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 302,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pages_content_url(parsed):\n",
    "    list_URL  = [urllib.parse.urlunparse\\\n",
    "                 ([nfu_urlparse.scheme,nfu_urlparse.netloc,'/'+ nfu_urlparse.path.split('/')[1] +'/' + detail_url,'','',''])\\\n",
    "                 for detail_url in parsed.xpath(dict_xpath['链接_xpath'])]\n",
    "    return list_URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 303,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['index.htm', 'index1.htm', 'index10.htm', 'index11.htm', 'index12.htm', 'index13.htm', 'index14.htm', 'index15.htm', 'index16.htm', 'index17.htm', 'index18.htm', 'index19.htm', 'index2.htm', 'index20.htm', 'index21.htm', 'index22.htm', 'index23.htm', 'index24.htm', 'index25.htm', 'index3.htm', 'index4.htm', 'index5.htm', 'index6.htm', 'index7.htm', 'index8.htm', 'index9.htm']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>教育部党组《求是》撰文：精心谋划 切实抓好教育系统党史学习教育</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/309be8b078444044b5...</td>\n",
       "      <td>2021-04-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>教育部长陈宝生：把巩固拓展作为开局之年工作主题，做到6个到位</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/159b20971f8b4051ba...</td>\n",
       "      <td>2021-03-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>如何建设高质量教育体系？“十四五”规划和2035年远景目标纲要明确了</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/27ba495edc1b49f88b...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>教育部长陈宝生《旗帜》撰文：建设高质量教育体系，加快建成教育强国</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/20dc120c250642cca5...</td>\n",
       "      <td>2021-01-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>重磅！《推进粤港澳大湾区高等教育合作发展规划》正式印发</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/b43531427fb44695bb...</td>\n",
       "      <td>2020-12-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>教育部长陈宝生：大力提升青少年宪法法治教育质量</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/1509f4f3bc2f4babbe...</td>\n",
       "      <td>2020-12-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>教育系统如何学习贯彻五中全会精神？教育部最新通知来了</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/a4b2fb3dacae456497...</td>\n",
       "      <td>2020-11-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>教育部长陈宝生撰文：建设高质量教育体系  五中全会深解读</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/0e7d664c116f4a0ab9...</td>\n",
       "      <td>2020-11-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>教育部长陈宝生：教育系统要找准学习领会切入点与维度，做到6个“理解”|五中全会大学习</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/71e152ddce12414388...</td>\n",
       "      <td>2020-11-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>教育战线如何贯彻落实五中全会精神？教育部最新部署来了</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/0d7bfc95f70841a6b5...</td>\n",
       "      <td>2020-11-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>如何深化新时代教育评价改革？教育部11问答详解（附一图看懂）</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/750c396e278446e687...</td>\n",
       "      <td>2020-10-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>中共中央 国务院印发《深化新时代教育评价改革总体方案》</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/cd3ecf8986ad40e991...</td>\n",
       "      <td>2020-10-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>重磅！中办、国办印发《关于全面加强和改进新时代学校体育工作的意见》和《关于全面加强和改进新时...</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/3a7835bded2441aeb6...</td>\n",
       "      <td>2020-10-21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>钟秉林、张志勇、沈炜解读：教育评价“指挥棒”全面转向！</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/4d0eb3a8b8ee47f6b9...</td>\n",
       "      <td>2020-10-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>教育部党组：开启全面建设高素质专业化创新型教师队伍新征程</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/355e3d6207974a3ea6...</td>\n",
       "      <td>2020-10-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>教育部长陈宝生《求是》撰文：新时代建设教育强国的根本指针</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/9b6486d83f454a2ca7...</td>\n",
       "      <td>2020-09-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>如何学习贯彻习近平总书记教师节重要寄语？教育部最新通知来了</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/6bb7172f46b3458b80...</td>\n",
       "      <td>2020-09-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>习近平主持召开科学家座谈会强调不断向科学技术广度和深度进军</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/a8f6f2c6a2c644d299...</td>\n",
       "      <td>2020-09-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>《求是》杂志发表习近平总书记重要文章</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/f978598b61024bb298...</td>\n",
       "      <td>2020-08-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>来听！教育部党组成员这样讲专题党课</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/3d9e9dfac159459386...</td>\n",
       "      <td>2020-08-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0</td>\n",
       "      <td>10项要求！教育部发文，进一步加强高校法治工作</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/1c9574356724407990...</td>\n",
       "      <td>2020-08-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1</td>\n",
       "      <td>教育部长陈宝生：把教育系统党的政治建设引向深入</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/64834a92847843d1ae...</td>\n",
       "      <td>2020-07-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2</td>\n",
       "      <td>如何把新时代大中小学劳动教育落到实处？教育部9问答详解</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/e5f4a1a9e8ae4e01b6...</td>\n",
       "      <td>2020-07-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>3</td>\n",
       "      <td>速递！教育部印发《大中小学劳动教育指导纲要（试行）》</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/b889fc2138324a5bab...</td>\n",
       "      <td>2020-07-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>4</td>\n",
       "      <td>《人民日报》专访教育部总督学郑富芝：八方面加强督导 四举措问责到人</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/516ba03e05ce411fa9...</td>\n",
       "      <td>2020-07-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>5</td>\n",
       "      <td>速递！教育部等8部门全面部署加快和扩大新时代教育对外开放</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/2980f80ba0e44882b6...</td>\n",
       "      <td>2020-06-19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>6</td>\n",
       "      <td>教育部长陈宝生：教育战线要重点瞄准三大目标，确保“收官之年”圆满收官</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/a3b10f27e6c645c9b5...</td>\n",
       "      <td>2020-06-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>7</td>\n",
       "      <td>教育部长陈宝生：抓准抓实，全面推进高校课程思政建设取得实效</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/ddbb9af58dd14771a0...</td>\n",
       "      <td>2020-06-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>8</td>\n",
       "      <td>如何全面推进高校课程思政建设？教育部8问答详解</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/b2d748f255db442d86...</td>\n",
       "      <td>2020-06-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>9</td>\n",
       "      <td>教育部印发《高等学校课程思政建设指导纲要》，全面推进高校课程思政建设</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/e36ae0e40096485c95...</td>\n",
       "      <td>2020-06-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>341</th>\n",
       "      <td>1</td>\n",
       "      <td>调查显示：过半应届生求职仍扎堆“北上广深”</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/b2058238570e48e496...</td>\n",
       "      <td>2014-05-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>342</th>\n",
       "      <td>2</td>\n",
       "      <td>高端海归近5年回国3万 人数是前30年的3倍</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/53f23d6dcf2c4764a6...</td>\n",
       "      <td>2014-05-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>343</th>\n",
       "      <td>3</td>\n",
       "      <td>习近平：青年要自觉践行社会主义核心价值观 ——在北京大学师生座谈会上的讲话</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/38795d5e83b34707bb...</td>\n",
       "      <td>2014-05-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>344</th>\n",
       "      <td>4</td>\n",
       "      <td>大学毕业人数破700万 “隐性就业”能否避免“毕业即失业”</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/4bcbfd4a23684b1281...</td>\n",
       "      <td>2014-05-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>345</th>\n",
       "      <td>5</td>\n",
       "      <td>中国高教界“大腕”聚昆共涉教改“深水区”</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/62e82445c20b40f5a0...</td>\n",
       "      <td>2014-04-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>347</th>\n",
       "      <td>7</td>\n",
       "      <td>调查显示八成应届生就业观求职中“生变”</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/401b84f616944f559d...</td>\n",
       "      <td>2014-04-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>346</th>\n",
       "      <td>6</td>\n",
       "      <td>广东为2.8万名贫困生张罗招聘会</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/a9f067fdd36a4abcb7...</td>\n",
       "      <td>2014-04-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>348</th>\n",
       "      <td>8</td>\n",
       "      <td>今年高校毕业生就业规模将达727万人 比去年增28万</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/b77384a898e444f19c...</td>\n",
       "      <td>2014-04-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>349</th>\n",
       "      <td>9</td>\n",
       "      <td>大学生上课，费脑子还是费流量？</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/dbd812f735e24e6cb9...</td>\n",
       "      <td>2014-04-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350</th>\n",
       "      <td>10</td>\n",
       "      <td>中国掀起“慕课”热潮重塑大学课堂</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/a2ea0bade0dc4ac0b3...</td>\n",
       "      <td>2014-04-21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>351</th>\n",
       "      <td>11</td>\n",
       "      <td>大学生创业享受优惠政策须持两证</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/ee0cac181e8240cead...</td>\n",
       "      <td>2014-04-21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>352</th>\n",
       "      <td>12</td>\n",
       "      <td>中国重点高校扩大贫困地区招生比例助农村学生圆梦</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/9e113621b1bd444da9...</td>\n",
       "      <td>2014-04-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>353</th>\n",
       "      <td>13</td>\n",
       "      <td>教育部部署2014年高等学校科技改革重点工作</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/b9331db1468b4c2783...</td>\n",
       "      <td>2014-04-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>354</th>\n",
       "      <td>14</td>\n",
       "      <td>高等教育国际化须警惕过度商业化</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/e2a83d35ce63437e81...</td>\n",
       "      <td>2014-04-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>355</th>\n",
       "      <td>15</td>\n",
       "      <td>广东出台“特支计划”重点遴选培养杰出人才</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/a10bea05e10f4940b4...</td>\n",
       "      <td>2014-04-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>356</th>\n",
       "      <td>16</td>\n",
       "      <td>75所高校毕业生就业质量报告：须完善规范和标准</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/da3b772713c34cb084...</td>\n",
       "      <td>2014-04-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>357</th>\n",
       "      <td>17</td>\n",
       "      <td>分类、开放将成中国高校科研评价方向</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/fcfca6a4203f4b5586...</td>\n",
       "      <td>2014-04-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>358</th>\n",
       "      <td>18</td>\n",
       "      <td>北京6年投1.5亿元鼓励大学生科研创新</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/99099804ea794a3386...</td>\n",
       "      <td>2014-04-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>359</th>\n",
       "      <td>19</td>\n",
       "      <td>就业决定招生：中国高校破解“就业难”的新尝试</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/46c2eec344ae4626ac...</td>\n",
       "      <td>2014-04-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360</th>\n",
       "      <td>0</td>\n",
       "      <td>17高校面向内地招生 “港校北上潮”持续升温</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/48ecc7287ed14d3987...</td>\n",
       "      <td>2014-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>361</th>\n",
       "      <td>1</td>\n",
       "      <td>浙江将在高校推广“一站式”学生事务中心模式</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/870f9717698b45eb92...</td>\n",
       "      <td>2014-04-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>362</th>\n",
       "      <td>2</td>\n",
       "      <td>中国高校研究生面临更高毕业“门槛”</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/b725709fcdc14df0b7...</td>\n",
       "      <td>2014-04-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>364</th>\n",
       "      <td>4</td>\n",
       "      <td>聚焦“就业季”：“先上大学后选专业”怎么样？</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/cff97426f8fe472882...</td>\n",
       "      <td>2014-04-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>363</th>\n",
       "      <td>3</td>\n",
       "      <td>75所教育部直属高校发布毕业生就业质量年度报告</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/94bad9400bf34913b4...</td>\n",
       "      <td>2014-04-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>365</th>\n",
       "      <td>5</td>\n",
       "      <td>国家重点学科审批取消 没了指挥棒高校怎么办</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/ebd7084c98754523b0...</td>\n",
       "      <td>2014-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>366</th>\n",
       "      <td>6</td>\n",
       "      <td>广东省教育厅：今年毕业生就业形势比去年好</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/3829e4c5df9e460abc...</td>\n",
       "      <td>2014-03-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>367</th>\n",
       "      <td>7</td>\n",
       "      <td>要求职业“高大上” 高校毕业生择业扎堆致就业难</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/776ebc41fae84b36a4...</td>\n",
       "      <td>2014-03-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>368</th>\n",
       "      <td>8</td>\n",
       "      <td>教育部：预计今年贫困地区农村学生上重点高校的人数将比去年增加10%以上</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/41d339ccb3a0464c9c...</td>\n",
       "      <td>2014-03-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>9</td>\n",
       "      <td>学位论文如何才能挤出“水分”</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/1e8fa309bcf847b6ad...</td>\n",
       "      <td>2014-03-24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>10</td>\n",
       "      <td>高校低年级学生频繁试水招聘会 专家：鼓励提前预热</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/3f34245a7cb449c99b...</td>\n",
       "      <td>2013-03-31</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>511 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index                                                 标题  \\\n",
       "0        0                    教育部党组《求是》撰文：精心谋划 切实抓好教育系统党史学习教育   \n",
       "1        1                     教育部长陈宝生：把巩固拓展作为开局之年工作主题，做到6个到位   \n",
       "2        2                 如何建设高质量教育体系？“十四五”规划和2035年远景目标纲要明确了   \n",
       "3        3                   教育部长陈宝生《旗帜》撰文：建设高质量教育体系，加快建成教育强国   \n",
       "4        4                        重磅！《推进粤港澳大湾区高等教育合作发展规划》正式印发   \n",
       "5        5                            教育部长陈宝生：大力提升青少年宪法法治教育质量   \n",
       "6        6                         教育系统如何学习贯彻五中全会精神？教育部最新通知来了   \n",
       "7        7                       教育部长陈宝生撰文：建设高质量教育体系  五中全会深解读   \n",
       "8        8         教育部长陈宝生：教育系统要找准学习领会切入点与维度，做到6个“理解”|五中全会大学习   \n",
       "9        9                         教育战线如何贯彻落实五中全会精神？教育部最新部署来了   \n",
       "10      10                     如何深化新时代教育评价改革？教育部11问答详解（附一图看懂）   \n",
       "11      11                        中共中央 国务院印发《深化新时代教育评价改革总体方案》   \n",
       "12      12  重磅！中办、国办印发《关于全面加强和改进新时代学校体育工作的意见》和《关于全面加强和改进新时...   \n",
       "13      13                        钟秉林、张志勇、沈炜解读：教育评价“指挥棒”全面转向！   \n",
       "14      14                       教育部党组：开启全面建设高素质专业化创新型教师队伍新征程   \n",
       "15      15                       教育部长陈宝生《求是》撰文：新时代建设教育强国的根本指针   \n",
       "16      16                      如何学习贯彻习近平总书记教师节重要寄语？教育部最新通知来了   \n",
       "17      17                      习近平主持召开科学家座谈会强调不断向科学技术广度和深度进军   \n",
       "18      18                                 《求是》杂志发表习近平总书记重要文章   \n",
       "19      19                                  来听！教育部党组成员这样讲专题党课   \n",
       "20       0                            10项要求！教育部发文，进一步加强高校法治工作   \n",
       "21       1                            教育部长陈宝生：把教育系统党的政治建设引向深入   \n",
       "22       2                        如何把新时代大中小学劳动教育落到实处？教育部9问答详解   \n",
       "23       3                         速递！教育部印发《大中小学劳动教育指导纲要（试行）》   \n",
       "24       4                  《人民日报》专访教育部总督学郑富芝：八方面加强督导 四举措问责到人   \n",
       "25       5                       速递！教育部等8部门全面部署加快和扩大新时代教育对外开放   \n",
       "26       6                 教育部长陈宝生：教育战线要重点瞄准三大目标，确保“收官之年”圆满收官   \n",
       "27       7                      教育部长陈宝生：抓准抓实，全面推进高校课程思政建设取得实效   \n",
       "28       8                            如何全面推进高校课程思政建设？教育部8问答详解   \n",
       "29       9                 教育部印发《高等学校课程思政建设指导纲要》，全面推进高校课程思政建设   \n",
       "..     ...                                                ...   \n",
       "341      1                              调查显示：过半应届生求职仍扎堆“北上广深”   \n",
       "342      2                             高端海归近5年回国3万 人数是前30年的3倍   \n",
       "343      3              习近平：青年要自觉践行社会主义核心价值观 ——在北京大学师生座谈会上的讲话   \n",
       "344      4                      大学毕业人数破700万 “隐性就业”能否避免“毕业即失业”   \n",
       "345      5                               中国高教界“大腕”聚昆共涉教改“深水区”   \n",
       "347      7                                调查显示八成应届生就业观求职中“生变”   \n",
       "346      6                                   广东为2.8万名贫困生张罗招聘会   \n",
       "348      8                         今年高校毕业生就业规模将达727万人 比去年增28万   \n",
       "349      9                                    大学生上课，费脑子还是费流量？   \n",
       "350     10                                   中国掀起“慕课”热潮重塑大学课堂   \n",
       "351     11                                    大学生创业享受优惠政策须持两证   \n",
       "352     12                            中国重点高校扩大贫困地区招生比例助农村学生圆梦   \n",
       "353     13                             教育部部署2014年高等学校科技改革重点工作   \n",
       "354     14                                    高等教育国际化须警惕过度商业化   \n",
       "355     15                               广东出台“特支计划”重点遴选培养杰出人才   \n",
       "356     16                            75所高校毕业生就业质量报告：须完善规范和标准   \n",
       "357     17                                  分类、开放将成中国高校科研评价方向   \n",
       "358     18                                北京6年投1.5亿元鼓励大学生科研创新   \n",
       "359     19                             就业决定招生：中国高校破解“就业难”的新尝试   \n",
       "360      0                             17高校面向内地招生 “港校北上潮”持续升温   \n",
       "361      1                              浙江将在高校推广“一站式”学生事务中心模式   \n",
       "362      2                                  中国高校研究生面临更高毕业“门槛”   \n",
       "364      4                             聚焦“就业季”：“先上大学后选专业”怎么样？   \n",
       "363      3                            75所教育部直属高校发布毕业生就业质量年度报告   \n",
       "365      5                              国家重点学科审批取消 没了指挥棒高校怎么办   \n",
       "366      6                               广东省教育厅：今年毕业生就业形势比去年好   \n",
       "367      7                            要求职业“高大上” 高校毕业生择业扎堆致就业难   \n",
       "368      8                教育部：预计今年贫困地区农村学生上重点高校的人数将比去年增加10%以上   \n",
       "369      9                                     学位论文如何才能挤出“水分”   \n",
       "370     10                           高校低年级学生频繁试水招聘会 专家：鼓励提前预热   \n",
       "\n",
       "                                                    链结          日期  \n",
       "0    https://www.nfu.edu.cn/gjdt/309be8b078444044b5...  2021-04-08  \n",
       "1    https://www.nfu.edu.cn/gjdt/159b20971f8b4051ba...  2021-03-20  \n",
       "2    https://www.nfu.edu.cn/gjdt/27ba495edc1b49f88b...  2021-03-15  \n",
       "3    https://www.nfu.edu.cn/gjdt/20dc120c250642cca5...  2021-01-05  \n",
       "4    https://www.nfu.edu.cn/gjdt/b43531427fb44695bb...  2020-12-22  \n",
       "5    https://www.nfu.edu.cn/gjdt/1509f4f3bc2f4babbe...  2020-12-15  \n",
       "6    https://www.nfu.edu.cn/gjdt/a4b2fb3dacae456497...  2020-11-20  \n",
       "7    https://www.nfu.edu.cn/gjdt/0e7d664c116f4a0ab9...  2020-11-12  \n",
       "8    https://www.nfu.edu.cn/gjdt/71e152ddce12414388...  2020-11-06  \n",
       "9    https://www.nfu.edu.cn/gjdt/0d7bfc95f70841a6b5...  2020-11-04  \n",
       "10   https://www.nfu.edu.cn/gjdt/750c396e278446e687...  2020-10-22  \n",
       "11   https://www.nfu.edu.cn/gjdt/cd3ecf8986ad40e991...  2020-10-22  \n",
       "12   https://www.nfu.edu.cn/gjdt/3a7835bded2441aeb6...  2020-10-21  \n",
       "13   https://www.nfu.edu.cn/gjdt/4d0eb3a8b8ee47f6b9...  2020-10-20  \n",
       "14   https://www.nfu.edu.cn/gjdt/355e3d6207974a3ea6...  2020-10-10  \n",
       "15   https://www.nfu.edu.cn/gjdt/9b6486d83f454a2ca7...  2020-09-22  \n",
       "16   https://www.nfu.edu.cn/gjdt/6bb7172f46b3458b80...  2020-09-12  \n",
       "17   https://www.nfu.edu.cn/gjdt/a8f6f2c6a2c644d299...  2020-09-11  \n",
       "18   https://www.nfu.edu.cn/gjdt/f978598b61024bb298...  2020-08-31  \n",
       "19   https://www.nfu.edu.cn/gjdt/3d9e9dfac159459386...  2020-08-28  \n",
       "20   https://www.nfu.edu.cn/gjdt/1c9574356724407990...  2020-08-10  \n",
       "21   https://www.nfu.edu.cn/gjdt/64834a92847843d1ae...  2020-07-28  \n",
       "22   https://www.nfu.edu.cn/gjdt/e5f4a1a9e8ae4e01b6...  2020-07-20  \n",
       "23   https://www.nfu.edu.cn/gjdt/b889fc2138324a5bab...  2020-07-17  \n",
       "24   https://www.nfu.edu.cn/gjdt/516ba03e05ce411fa9...  2020-07-02  \n",
       "25   https://www.nfu.edu.cn/gjdt/2980f80ba0e44882b6...  2020-06-19  \n",
       "26   https://www.nfu.edu.cn/gjdt/a3b10f27e6c645c9b5...  2020-06-16  \n",
       "27   https://www.nfu.edu.cn/gjdt/ddbb9af58dd14771a0...  2020-06-09  \n",
       "28   https://www.nfu.edu.cn/gjdt/b2d748f255db442d86...  2020-06-05  \n",
       "29   https://www.nfu.edu.cn/gjdt/e36ae0e40096485c95...  2020-06-05  \n",
       "..                                                 ...         ...  \n",
       "341  https://www.nfu.edu.cn/gjdt/b2058238570e48e496...  2014-05-08  \n",
       "342  https://www.nfu.edu.cn/gjdt/53f23d6dcf2c4764a6...  2014-05-08  \n",
       "343  https://www.nfu.edu.cn/gjdt/38795d5e83b34707bb...  2014-05-05  \n",
       "344  https://www.nfu.edu.cn/gjdt/4bcbfd4a23684b1281...  2014-05-04  \n",
       "345  https://www.nfu.edu.cn/gjdt/62e82445c20b40f5a0...  2014-04-30  \n",
       "347  https://www.nfu.edu.cn/gjdt/401b84f616944f559d...  2014-04-29  \n",
       "346  https://www.nfu.edu.cn/gjdt/a9f067fdd36a4abcb7...  2014-04-29  \n",
       "348  https://www.nfu.edu.cn/gjdt/b77384a898e444f19c...  2014-04-25  \n",
       "349  https://www.nfu.edu.cn/gjdt/dbd812f735e24e6cb9...  2014-04-25  \n",
       "350  https://www.nfu.edu.cn/gjdt/a2ea0bade0dc4ac0b3...  2014-04-21  \n",
       "351  https://www.nfu.edu.cn/gjdt/ee0cac181e8240cead...  2014-04-21  \n",
       "352  https://www.nfu.edu.cn/gjdt/9e113621b1bd444da9...  2014-04-18  \n",
       "353  https://www.nfu.edu.cn/gjdt/b9331db1468b4c2783...  2014-04-18  \n",
       "354  https://www.nfu.edu.cn/gjdt/e2a83d35ce63437e81...  2014-04-17  \n",
       "355  https://www.nfu.edu.cn/gjdt/a10bea05e10f4940b4...  2014-04-14  \n",
       "356  https://www.nfu.edu.cn/gjdt/da3b772713c34cb084...  2014-04-14  \n",
       "357  https://www.nfu.edu.cn/gjdt/fcfca6a4203f4b5586...  2014-04-14  \n",
       "358  https://www.nfu.edu.cn/gjdt/99099804ea794a3386...  2014-04-11  \n",
       "359  https://www.nfu.edu.cn/gjdt/46c2eec344ae4626ac...  2014-04-11  \n",
       "360  https://www.nfu.edu.cn/gjdt/48ecc7287ed14d3987...  2014-04-09  \n",
       "361  https://www.nfu.edu.cn/gjdt/870f9717698b45eb92...  2014-04-08  \n",
       "362  https://www.nfu.edu.cn/gjdt/b725709fcdc14df0b7...  2014-04-04  \n",
       "364  https://www.nfu.edu.cn/gjdt/cff97426f8fe472882...  2014-04-03  \n",
       "363  https://www.nfu.edu.cn/gjdt/94bad9400bf34913b4...  2014-04-03  \n",
       "365  https://www.nfu.edu.cn/gjdt/ebd7084c98754523b0...  2014-04-02  \n",
       "366  https://www.nfu.edu.cn/gjdt/3829e4c5df9e460abc...  2014-03-28  \n",
       "367  https://www.nfu.edu.cn/gjdt/776ebc41fae84b36a4...  2014-03-27  \n",
       "368  https://www.nfu.edu.cn/gjdt/41d339ccb3a0464c9c...  2014-03-25  \n",
       "369  https://www.nfu.edu.cn/gjdt/1e8fa309bcf847b6ad...  2014-03-24  \n",
       "370  https://www.nfu.edu.cn/gjdt/3f34245a7cb449c99b...  2013-03-31  \n",
       "\n",
       "[511 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "list_df = []\n",
    "\n",
    "\n",
    "files= os.listdir('html_out/gjdt/')\n",
    "print(files)\n",
    "\n",
    "for html in files:\n",
    "    with open('html_out/gjdt/'+html,encoding='utf8',mode='r') as fp:\n",
    "        html_load = fp.read()\n",
    "        parsed = requests_html.soup_parse(html_load)\n",
    "        list_URL = pages_content_url(parsed)\n",
    "        \n",
    "        df = pd.DataFrame( {\n",
    "         \"标题\": parsed.xpath(dict_xpath['标题_xpath']),\n",
    "         \"链结\": list_URL,\n",
    "         \"日期\": parsed.xpath(dict_xpath['日期_xpath']),\n",
    "        } )\n",
    "        list_df.append(df)\n",
    "\n",
    "        \n",
    "        \n",
    "df_all = pd.concat(list_df).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all)    \n",
    "\n",
    "df_all\n",
    "\n",
    "df_all.to_excel(\"data_out/nfu_gjdt_文学与传媒学院.xlsx\", sheet_name=\"高教动态\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "336px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
